diff --git a/.travis.yml b/.travis.yml index 5622b1bbf4e6..8f01657011a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,8 +15,7 @@ install: - pip install aura.tar.gz script: - - python build.py --distro openshift-enterprise --product "OpenShift Container Platform" --version 4.3 --no-upstream-fetch - - python makeBuild.py + - python build.py --distro openshift-enterprise --product "OpenShift Container Platform" --version 4.3 --no-upstream-fetch && python makeBuild.py after_success: - bash autopreview.sh diff --git a/build.py b/build.py index 6842b435ca13..225697e51983 100755 --- a/build.py +++ b/build.py @@ -578,29 +578,35 @@ def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None): link_anchor = link.group(2) link_title = link.group(3) - if link_file is not None: fixed_link_file = link_file.replace(".html", ".adoc") fixed_link_file_abs = os.path.abspath(os.path.join(current_dir, fixed_link_file)) if fixed_link_file_abs in file_to_id_map: + if fixed_link_file_abs.startswith(book_dir + os.sep) or fixed_link_file_abs == src_file: + # We are dealing with a cross reference within the same book here + if link_anchor is None: + # Cross reference to the top of a topic, without an id being specified + link_anchor = "#" + file_to_id_map[fixed_link_file_abs] - # We are dealing with a cross reference to another book here - external_link = EXTERNAL_LINK_RE.search(link_file) - book_dir_name = external_link.group(1) - - # Find the book name - book_name = book_dir_name - for book in info['data']: - if check_node_distro_matches(book, info['distro']) and book['Dir'] == book_dir_name: - book_name = book['Name'] - break + fixed_link = "xref:" + link_anchor.replace("#", "") + link_title + else: + # We are dealing with a cross reference to another book here + external_link = EXTERNAL_LINK_RE.search(link_file) + book_dir_name = external_link.group(1) + + # Find the book name + book_name = book_dir_name + for book in info['data']: + if check_node_distro_matches(book, info['distro']) and book['Dir'] == book_dir_name: + book_name = book['Name'] + break - fixed_link_file = BASE_PORTAL_URL + build_portal_url(info, book_name) + fixed_link_file = BASE_PORTAL_URL + build_portal_url(info, book_name) - if link_anchor is None: - fixed_link = "link:" + fixed_link_file + "#" + file_to_id_map[fixed_link_file_abs] + link_title - else: - fixed_link = "link:" + fixed_link_file + link_anchor + link_title + if link_anchor is None: + fixed_link = "link:" + fixed_link_file + "#" + file_to_id_map[fixed_link_file_abs] + link_title + else: + fixed_link = "link:" + fixed_link_file + link_anchor + link_title else: # Cross reference or link that isn't in the docs suite fixed_link = link_text @@ -608,6 +614,7 @@ def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None): rel_src_file = src_file.replace(os.path.dirname(book_dir) + "/", "") has_errors = True log.error("ERROR (%s): \"%s\" appears to try to reference a file not included in the \"%s\" distro", rel_src_file, link_text.replace("\n", ""), info['distro']) + sys.exit(-1) else: fixed_link = "xref:" + link_anchor.replace("#", "") + link_title diff --git a/build_for_portal.py b/build_for_portal.py new file mode 100644 index 000000000000..6842b435ca13 --- /dev/null +++ b/build_for_portal.py @@ -0,0 +1,973 @@ +#!/usr/bin/python + +import argparse +import ConfigParser +import filecmp +import fnmatch +import logging +import os +import re +import shutil +import subprocess +import sys +import time +import yaml + +from aura import cli + +cli.init_logging(False, True) + +has_errors = False +CLONE_DIR = "." +BASE_PORTAL_URL = "https://access.redhat.com/documentation/en-us/" +# ID_RE = re.compile("^\[(?:\[|id=\'|#)(.*?)(\'?,.*?)?(?:\]|\')?\]", re.M | re.DOTALL) +ID_RE = re.compile("^\[(?:\[|id=\'|#|id=\")(.*?)(\'?,.*?)?(?:\]|\'|\")?\]", re.M | re.DOTALL) +LINKS_RE = re.compile("(?:xref|link):([\./\w_-]*/?[\w_.-]*\.(?:html|adoc))?(#[\w_-]*)?(\[.*?\])", re.M | re.DOTALL) +EXTERNAL_LINK_RE = re.compile("[\./]*([\w_-]+)/[\w_/-]*?([\w_.-]*\.(?:html|adoc))", re.DOTALL) +INCLUDE_RE = re.compile("include::(.*?)\[(.*?)\]", re.M) +IFDEF_RE = re.compile(r"^if(n?)def::(.*?)\[\]", re.M) +ENDIF_RE = re.compile(r"^endif::(.*?)\[\]\r?\n", re.M) +COMMENT_CONTENT_RE = re.compile(r"^^////$.*?^////$", re.M | re.DOTALL) +TAG_CONTENT_RE = re.compile(r"//\s+tag::(.*?)\[\].*?// end::(.*?)\[\]", re.M | re.DOTALL) +CMP_IGNORE_FILES = [".git", ".gitignore", "README.md", "build.cfg"] +DEVNULL = open(os.devnull, 'wb') + + +MASTER_FILE_BASE = "= {title}\n\ +:product-author: {product-author}\n\ +:product-title: {product}\n\ +:product-version: {product-version}\n\ +:{distro}:\n\ +:imagesdir: images\n\ +:idseparator: -\n\ +{preface-title}\n" + +DOCINFO_BASE = "{title}\n\ +{{product-title}}\n\ +{{product-version}}\n\ +Enter a short description here.\n\ +\n\ + A short overview and summary of the book's subject and purpose, traditionally no more than one paragraph long.\n\ +\n\ +\n\ + {product-author}\n\ +\n\ +\n" + +# A list of book titles, that still use the old drupal url format (ie includes the product/version in the book title part) +# eg. openshift-enterprise/version-3.0/openshift-enterprise-30-getting-started vs openshift-enterprise/version-3.0/getting-started +DRUPAL_OLD_URL_TITLES = [ + "Administrator Guide", + "Architecture", + "CLI Reference", + "Creating Images", + "Developer Guide", + "Getting Started", + "REST API Reference", + "Using Images", + "What's New?" +] + +# A mapping of upstream book/category names to CP book names +BOOK_NAME_OVERRIDES = { + "Administration": "Administrator Guide" +} + +# Lines that should be stripped out/ignored when cleaning the content +IGNORE_LINES = [ + "{product-author}\n", + "{product-version}\n", + "{product-version]\n", + "{Lucas Costi}\n", + "toc::[]\n" +] + +# Each MACRO in this list is omitted from the output +# if the input appears as ':MACRO:' (colon, MACRO, colon). +IGNORE_MACROS = [ + "description", + "keywords", + "icons", + "data-uri", + "toc", + "toc-title" +] + +# Files where the title should be removed when building the all-in-one +ALL_IN_ONE_SCRAP_TITLE = [ + "welcome/index.adoc" +] + +# Files that should be commented out in the toc structure +COMMENT_FILES = [ + "admin_guide/overview.adoc", + "creating_images/overview.adoc", + "dev_guide/overview.adoc", + "using_images/overview.adoc", + "rest_api/overview.adoc" +] + +# Map FILENAME to a map of TITLE to ID. In most of the cases the +# ID is the TITLE downcased, with "strange" chars replaced by hyphen. +# A notable exception is 'any' TITLE. +TITLE_IDS = {} +# A dictionary of existing dup ids to new unique ids +DUPLICATE_IDS = {} +# Map FILENAME to a map of BAD to GOOD. Most of the time, BAD and GOOD +# are in link syntax, i.e., beginning with "link:", but not always. +INCORRECT_LINKS = {} + +log = logging.getLogger("build") + + +def setup_parser(): + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("--distro", help="The distribution to build for", default="openshift-enterprise") + parser.add_argument("--all-in-one", help=argparse.SUPPRESS, action="store_true") + parser.add_argument("--title", help=argparse.SUPPRESS, default="Documentation") + parser.add_argument("--product", default="OpenShift Enterprise") + parser.add_argument("--version", default="3.0") + parser.add_argument("--author", default="Red Hat OpenShift Documentation Team") + parser.add_argument("--upstream-url", help="The upstream source url", default="https://github.com/openshift/openshift-docs.git") + parser.add_argument("--upstream-branch", help="The upstream source branch", default="enterprise-3.0") + parser.add_argument("--branch", help="The GitLab branch to commit changes into", default="GA") + parser.add_argument("-p", "--push", help="Commit and push the changes into GitLab", action="store_true") + parser.add_argument("--no-clean", help="Don't clean the drupal-build directory before building", action="store_true") + parser.add_argument("--no-upstream-fetch", help="Don't fetch the upstream sources", action="store_true") + return parser + + +def find_build_config_file(): + """ + Finds the build config file to use, as it might be _topic_map.yml or _build_cfg.yml + """ + config = os.path.abspath(os.path.join(CLONE_DIR, "_topic_map.yml")) + if not os.path.isfile(config): + config = os.path.abspath(os.path.join(CLONE_DIR, "_build_cfg.yml")) + + return config + + +def parse_build_config(config): + """ + Parses the build config and returns a tree based structure for the config. + """ + config = os.path.expanduser(config) + with open(config, "r") as f: + data = list(yaml.load_all(f)) + + for book in data: + book_name = book['Name'] + if book_name in BOOK_NAME_OVERRIDES: + book['Name'] = BOOK_NAME_OVERRIDES[book_name] + + return data + + +def iter_tree(node, distro, dir_callback=None, topic_callback=None, include_path=True, parent_dir="", depth=0): + """ + Iterates over a build config tree starting from a specifc node, skipping content where the distro doesn't match. Additionally calls are + made to the dir_callback or topic_callback functions when a directory or topic is found. + """ + if "Topics" in node: + if check_node_distro_matches(node, distro): + if include_path: + topics_dir = os.path.join(parent_dir, node["Dir"]) + else: + topics_dir = "" + + if dir_callback is not None: + dir_callback(node, parent_dir, depth) + + for topic in node["Topics"]: + iter_tree(topic, distro, dir_callback, topic_callback, True, topics_dir, depth + 1) + elif check_node_distro_matches(node, distro): + if topic_callback is not None: + topic_callback(node, parent_dir, depth) + + +def check_node_distro_matches(node, distro): + """ + Checks to see if the specified distro matches a distro in the nodes distros list. If there is no distros list specified on the + node then all distros are allowed, so return true. + """ + if "Distros" not in node: + return True + else: + node_distros = [x.strip() for x in node['Distros'].split(",")] + for node_distro in node_distros: + # Check for an exact match, or a glob match + if node_distro == distro or fnmatch.fnmatchcase(distro, node_distro): + return True + + return False + + +def ensure_directory(directory): + """ + Creates DIRECTORY if it does not exist. + """ + if not os.path.exists(directory): + os.mkdir(directory) + + +def build_master_files(info): + """ + Builds the master.adoc and docinfo.xml files for each guide specified in the config. + """ + dest_dir = info['dest_dir'] + + all_in_one = info['all_in_one'] + all_in_one_text = "" + for book in info['book_nodes']: + book_dest_dir = os.path.join(dest_dir, book['Dir']) + ensure_directory(book_dest_dir) + + book_info = dict(info) + book_info['title'] = book['Name'] + + master = generate_master_entry(book, book['Dir'], info['distro'], all_in_one, all_in_one=all_in_one) + + # Save the content + if not all_in_one: + master_file = os.path.join(book_dest_dir, 'master.adoc') + docinfo_file = os.path.join(book_dest_dir, 'docinfo.xml') + master_base = MASTER_FILE_BASE.format(**book_info) + + log.debug("Writing " + master_file) + with open(master_file, "w") as f: + f.write(master_base + master) + log.debug("Writing " + docinfo_file) + with open(docinfo_file, "w") as f: + f.write(DOCINFO_BASE.format(**book_info)) + else: + if all_in_one_text == "": + # Remove the title for the first file in the book + master = master.replace("= " + book['Name'] + "\n", "") + + # Set the preface title from the first file in the book + first_file = os.path.join(info['src_dir'], book['Dir'], book['Topics'][0]['File'] + ".adoc") + preface_title = None + with open(first_file, "r") as f: + line = f.readline() + while line: + if include_line(line): + preface_title = re.sub("^=+ ", "", line) + break + line = f.readline() + if preface_title is not None: + info['preface-title'] = ":preface-title: " + preface_title + all_in_one_text += master + + if all_in_one: + master_file = os.path.join(dest_dir, 'master.adoc') + docinfo_file = os.path.join(dest_dir, 'docinfo.xml') + + master_base = MASTER_FILE_BASE.format(**info) + + log.debug("Writing " + master_file) + with open(master_file, "w") as f: + f.write(master_base + all_in_one_text) + log.debug("Writing " + docinfo_file) + with open(docinfo_file, "w") as f: + f.write(DOCINFO_BASE.format(**info)) + + +def generate_master_entry(node, book_dir, distro, include_name=True, all_in_one=False): + """ + Generates the master.adoc core content for a specific book/node. + """ + master_entries = [] + + def dir_callback(dir_node, parent_dir, depth): + if include_name or depth > 0: + master_entries.append("=" * (depth + 1) + " " + dir_node["Name"].replace("\\", "")) + + def topic_callback(topic_node, parent_dir, depth): + book_file_path = os.path.join(parent_dir, topic_node["File"] + ".adoc") + file_path = os.path.join(book_dir, book_file_path) + include = "include::" + book_file_path + "[leveloffset=+" + str(depth) + "]" + if not all_in_one and file_path in COMMENT_FILES: + master_entries.append("////") + master_entries.append(include) + master_entries.append("////") + else: + master_entries.append(include) + # Add a blank line + master_entries.append("") + + # Iterate over the tree and build the master.adoc content + iter_tree(node, distro, dir_callback, topic_callback, include_name) + return "\n".join(master_entries) + + +def reformat_for_drupal(info): + """ + Reformats the source content for use in the Customer Portal. This function does the following: + + - Copies images over and flattens them into a single dir + - Copies source asciidoc over + - Filters the AsciiDoc source to remove duplicate macro definitions, that should only be in the main file. + - Adds id's for each file, so the files can be properly cross referenced. + - Adds id's to sections that are cross referenced, but have no id. + - Fixes duplicate id's in the source content. + - Fixes links that have been done incorrectly and should be cross references instead. + """ + books = info['book_nodes'] + src_dir = info['src_dir'] + dest_dir = info['dest_dir'] + distro = info['distro'] + + # Build a mapping of files to ids + # Note: For all-in-one we have to collect ids from all books first + file_to_id_map = {} + if info['all_in_one']: + book_ids = [] + for book in books: + book_ids.extend(collect_existing_ids(book, distro, src_dir)) + for book in books: + file_to_id_map.update(build_file_to_id_map(book, distro, book_ids, src_dir)) + else: + for book in books: + book_ids = collect_existing_ids(book, distro, src_dir) + file_to_id_map.update(build_file_to_id_map(book, distro, book_ids, src_dir)) + info['file_to_id_map'] = file_to_id_map + + # Reformat the data + for book in books: + log.info("Processing %s", book['Dir']) + book_src_dir = os.path.join(src_dir, book['Dir']) + + if info['all_in_one']: + images_dir = os.path.join(dest_dir, "images") + else: + book_dest_dir = os.path.join(dest_dir, book['Dir']) + images_dir = os.path.join(book_dest_dir, "images") + + ensure_directory(images_dir) + + log.debug("Copying source files for " + book['Name']) + copy_files(book, book_src_dir, src_dir, dest_dir, info) + + log.debug("Copying images for " + book['Name']) + copy_images(book, src_dir, images_dir, distro) + + +def copy_images(node, src_path, dest_dir, distro): + """ + Copy images over to the destination directory and flatten all image directories into the one top level dir. + """ + def dir_callback(dir_node, parent_dir, depth): + node_dir = os.path.join(parent_dir, dir_node['Dir']) + src = os.path.join(node_dir, "images") + + if os.path.exists(src): + src_files = os.listdir(src) + for src_file in src_files: + shutil.copy(os.path.join(src, src_file), dest_dir) + + iter_tree(node, distro, dir_callback, parent_dir=src_path) + + +def copy_files(node, book_src_dir, src_dir, dest_dir, info): + """ + Recursively copy files from the source directory to the destination directory, making sure to scrub the content, add id's where the + content is referenced elsewhere and fix any links that should be cross references. + """ + def dir_callback(dir_node, parent_dir, depth): + node_dest_dir = os.path.join(dest_dir, parent_dir, dir_node['Dir']) + ensure_directory(node_dest_dir) + + def topic_callback(topic_node, parent_dir, depth): + node_src_dir = os.path.join(src_dir, parent_dir) + node_dest_dir = os.path.join(dest_dir, parent_dir) + + src_file = os.path.join(node_src_dir, topic_node["File"] + ".adoc") + dest_file = os.path.join(node_dest_dir, topic_node["File"] + ".adoc") + + # Copy the file + copy_file(info, book_src_dir, src_file, dest_dir, dest_file) + + iter_tree(node, info['distro'], dir_callback, topic_callback) + + +def copy_file(info, book_src_dir, src_file, dest_dir, dest_file, include_check=True, tag=None, cwd=None): + """ + Copies a source file to destination, making sure to scrub the content, add id's where the content is referenced elsewhere and fix any + links that should be cross references. Also copies any includes that are referenced, since they aren't included in _build_cfg.yml. + """ + # It's possible that the file might have been created by another include, if so then just return + if os.path.isfile(dest_file): + return + + # Touch the dest file, so we can handle circular includes + parent_dir = os.path.dirname(dest_file) + if not os.path.exists(parent_dir): + os.makedirs(parent_dir) + #os.mknod(dest_file) + open(dest_file, 'w').close() + # Scrub/fix the content + content = scrub_file(info, book_src_dir, src_file, tag=tag, cwd=cwd) + + # Check for any includes + if include_check: + cleaned_content = remove_conditional_content(content, info) + include_iter = INCLUDE_RE.finditer(cleaned_content) + for include in include_iter: + include_text = include.group(0) + include_path = include.group(1) + include_unparsed_vars = include.group(2) + + # Determine the include vars + include_vars = {} + if include_unparsed_vars is not None and len(include_unparsed_vars) > 0: + for meta in re.split(r"\s*,\s*", include_unparsed_vars): + key, value = re.split("\s*=\s*", meta, 2) + include_vars[key] = value + + # Determine the include src/dest paths + include_file = os.path.join(os.path.dirname(book_src_dir), include_path) + relative_path = os.path.relpath(include_file, os.path.dirname(src_file)) + + # If the path is in another book, copy it into this one + relative_book_path = os.path.relpath(include_file, book_src_dir) + if relative_book_path.startswith("../"): + path, src_book_name = os.path.split(book_src_dir) + dest_include_dir = os.path.join(dest_dir, src_book_name, "includes") + relative_path = os.path.join(os.path.relpath(dest_include_dir, parent_dir), os.path.basename(include_file)) + else: + dest_include_dir = os.path.abspath(os.path.join(os.path.dirname(dest_file), os.path.dirname(relative_path))) + dest_include_file = os.path.join(dest_include_dir, os.path.basename(include_file)) + + # Make sure we have a reference to the current working dir + current_dir = cwd or os.path.dirname(src_file) + include_tag = include_vars.get("tag", None) + + # Copy the file and fix the content + if not os.path.isfile(dest_include_file): + copy_file(info, book_src_dir, include_file, dest_dir, dest_include_file, tag=include_tag, cwd=current_dir) + else: + # The file has already been copied, so just fix the links for this tag + with open(dest_include_file, 'r') as f: + include_content = f.read() + + # Fix any links + include_content = fix_links(include_content, info, book_src_dir, include_file, tag=include_tag, cwd=cwd) + + with open(dest_include_file, "w") as f: + f.write(include_content) + + content = content.replace(include_text, include.expand("include::" + relative_path + "[\\2]")) + + with open(dest_file, "w") as f: + f.write(content) + + +def scrub_file(info, book_src_dir, src_file, tag=None, cwd=None): + """ + Scrubs a file and returns the cleaned file contents. + """ + base_src_file = src_file.replace(info['src_dir'] + "/", "") + + # Get a list of predefined custom title ids for the file + title_ids = TITLE_IDS.get(base_src_file, {}) + + # Read in the source content + with open(src_file, 'r') as f: + src_file_content = f.readlines() + + # Scrub the content + content = "" + header_found = content_found = False + current_id = None + for line in src_file_content: + # Ignore any leading blank lines, before any meaningful content is found + if line.strip() == "" and not content_found: + continue + + # Check if the line should be included in the output + if include_line(line): + content_found = True + + # Setup the document header content/id + if not header_found and line.strip() != "" and line.startswith("="): + header_found = True + + if info['all_in_one'] and base_src_file in ALL_IN_ONE_SCRAP_TITLE and line.startswith("= "): + continue + # Add a section id if one doesn't exist, so we have something to link to + elif current_id is None and src_file in info['file_to_id_map']: + file_id = info['file_to_id_map'][src_file] + content += "[[" + file_id + "]]\n" + # Add a custom title id, if one is needed + elif line.startswith("=") and current_id is None: + for title in title_ids: + title_re = r"^=+ " + title.replace(".", "\\.").replace("?", "\\?") + "( (anchor|\[).*?)?(\n)?$" + if re.match(title_re, line): + content += "[[" + title_ids[title] + "]]\n" + + # Set the current id based on the line content + if current_id is None and ID_RE.match(line.strip()): + current_id = line.strip() + elif current_id is not None and line.strip != "": + current_id = None + + # Add the line to the processed content + content += line + + # Fix up any duplicate ids + if base_src_file in DUPLICATE_IDS: + for duplicate_id, new_id in DUPLICATE_IDS[base_src_file].items(): + content = content.replace("[[" + duplicate_id + "]]", "[[" + new_id + "]]") + + # Replace incorrect links with correct ones + if base_src_file in INCORRECT_LINKS: + for incorrect_link, fixed_link in INCORRECT_LINKS[base_src_file].items(): + content = content.replace(incorrect_link, fixed_link) + + # Fix up the links + content = fix_links(content, info, book_src_dir, src_file, tag=tag, cwd=cwd) + + return content + + +def include_line(line): + """ + Determines if a line should be included in the filtered output. + """ + if line in IGNORE_LINES: + return False + + for macro in IGNORE_MACROS: + if line.startswith(":" + macro + ":"): + return False + + return True + + +def fix_links(content, info, book_src_dir, src_file, tag=None, cwd=None): + """ + Fix any links that were done incorrectly and reference the output instead of the source content. + """ + if info['all_in_one']: + content = fix_links(content, info['src_dir'], src_file, info) + else: + # Determine if the tag should be passed when fixing the links. If it's in the same book, then process the entire file. If it's + # outside the book then don't process it. + if book_src_dir in src_file: + content = _fix_links(content, book_src_dir, src_file, info, cwd=cwd) + else: + content = _fix_links(content, book_src_dir, src_file, info, tag=tag, cwd=cwd) + + return content + + +def _fix_links(content, book_dir, src_file, info, tag=None, cwd=None): + """ + Fix any links that were done incorrectly and reference the output instead of the source content. + """ + # TODO Deal with xref so that they keep the proper path. Atm it'll just strip the path and leave only the id + file_to_id_map = info['file_to_id_map'] + current_dir = cwd or os.path.dirname(src_file) + cleaned_content = remove_conditional_content(content, info, tag=tag) + links = LINKS_RE.finditer(cleaned_content) + + for link in links: + link_text = link.group(0) + link_file = link.group(1) + link_anchor = link.group(2) + link_title = link.group(3) + + + if link_file is not None: + fixed_link_file = link_file.replace(".html", ".adoc") + fixed_link_file_abs = os.path.abspath(os.path.join(current_dir, fixed_link_file)) + if fixed_link_file_abs in file_to_id_map: + + # We are dealing with a cross reference to another book here + external_link = EXTERNAL_LINK_RE.search(link_file) + book_dir_name = external_link.group(1) + + # Find the book name + book_name = book_dir_name + for book in info['data']: + if check_node_distro_matches(book, info['distro']) and book['Dir'] == book_dir_name: + book_name = book['Name'] + break + + fixed_link_file = BASE_PORTAL_URL + build_portal_url(info, book_name) + + if link_anchor is None: + fixed_link = "link:" + fixed_link_file + "#" + file_to_id_map[fixed_link_file_abs] + link_title + else: + fixed_link = "link:" + fixed_link_file + link_anchor + link_title + else: + # Cross reference or link that isn't in the docs suite + fixed_link = link_text + if EXTERNAL_LINK_RE.search(link_file) is not None: + rel_src_file = src_file.replace(os.path.dirname(book_dir) + "/", "") + has_errors = True + log.error("ERROR (%s): \"%s\" appears to try to reference a file not included in the \"%s\" distro", rel_src_file, link_text.replace("\n", ""), info['distro']) + else: + fixed_link = "xref:" + link_anchor.replace("#", "") + link_title + + content = content.replace(link_text, fixed_link) + + return content + + +def remove_conditional_content(content, info, tag=None): + """ + Removes any conditional content that doesn't match for the specified distro + """ + # Remove any ifdef content + ifdef = IFDEF_RE.search(content) + while ifdef is not None: + is_not_def = ifdef.group(1) == "n" + ifdef_distros = ifdef.group(2).split(",") + pos = ifdef.start() + end = ifdef.end() + + # Determine if we should strip the conditional content, based on the distro + strip_content = False + if is_not_def and info['distro'] in ifdef_distros: + strip_content = True + elif not is_not_def and info['distro'] not in ifdef_distros: + strip_content = True + + # Remove the conditional content + if strip_content: + # Find the correct endif for the current ifdef + search_pos = end + endpos = len(content) + while True: + next_ifdef = IFDEF_RE.search(content, search_pos) + endif = ENDIF_RE.search(content, search_pos) + + if not endif: + break + elif not next_ifdef or next_ifdef.start() > endif.start(): + endpos = endif.end() + break + else: + search_pos = endif.end() + + # Replace the content and move the end pos to be the same as the start since the content was removed + ifdef_text = content[pos:endpos] + content = content.replace(ifdef_text, "") + end = pos + + # Move onto the next ifdef + ifdef = IFDEF_RE.search(content, end) + + # Remove commented out content + for comment in COMMENT_CONTENT_RE.finditer(content): + content = content.replace(comment.group(0), "") + + # Remove content outside of tags + if tag is not None: + for tag_match in TAG_CONTENT_RE.finditer(content): + tag_text = tag_match.group(0) + tag_label = tag_match.group(1) + if tag_label == tag: + # Tag matches, so only use the content in the tag + content = tag_text + + return content + + +def collect_existing_ids(node, distro, path): + """ + Examines all nodes asciidoc file contents and returns any existing ids. + """ + book_ids = [] + + def topic_callback(topic_node, parent_dir, depth): + src_file = os.path.join(parent_dir, topic_node["File"] + ".adoc") + file_ids = extract_file_ids(src_file) + book_ids.extend(file_ids) + + iter_tree(node, distro, topic_callback=topic_callback, parent_dir=path) + + return book_ids + + +def build_file_to_id_map(node, distro, existing_ids, path=""): + """ + Builds a mapping of file names/paths to the root id for the file. This is used to fix the links that are done incorrectly. + """ + file_to_id_map = {} + + def topic_callback(topic_node, parent_dir, depth): + src_file = os.path.join(parent_dir, topic_node["File"] + ".adoc") + file_to_id_map[src_file] = build_file_id(topic_node["Name"], file_to_id_map, existing_ids) + + iter_tree(node, distro, topic_callback=topic_callback, parent_dir=path) + return file_to_id_map + + +def extract_file_ids(file_path): + """ + Extracts all the ids used in the specified file. + """ + with open(file_path, "r") as f: + content = f.read() + + ids = ID_RE.finditer(content) + return [id.group(1) for id in ids] + + +def build_file_id(file_title, file_to_id_map, existing_ids): + """ + Generates a unique id for a file, based on it's title. + """ + file_id = base_id = re.sub(r"[\[\]\(\)#]", "", file_title.lower().replace("_", "-").replace(" ", "-")) + count = 1 + while file_id in existing_ids or file_id in file_to_id_map.values(): + file_id = base_id + "-" + str(count) + count += 1 + + return file_id + + +def build_portal_url(info, book_name): + """ + Builds a portal url path by escaping the content in the same way drupal does. + """ + product = info['product'] + version = info['product-version'] + + return generate_url_from_name(product) + "/" + generate_url_from_name(version) + "/html-single/" + generate_url_from_name(book_name) + "/" + + +def replace_nbsp(val): + """Replaces non breaking spaces with a regular space""" + if val is not None: + # Check if the string is unicode + if isinstance(val, unicode): + return val.replace(u'\xa0', ' ') + else: + return val.replace('\xc2\xa0', ' ') + else: + return None + + +def generate_url_from_name(name, delimiter='_'): + """ + Generates a url fragment from a product, version or titles name. + """ + # Remove characters that aren't allowed in urls + url = re.sub("^\.+|[^0-9a-zA-Z _\-.]+", "", replace_nbsp(name)) + # Replace spaces with the delimiter + url = re.sub("\s+", delimiter, url) + # Replace multiple underscores with a single underscore + url = re.sub(delimiter + "+", delimiter, url) + return url.lower() + + +def call_git_command(*args, **kwargs): + """ + Calls a git command and retries the command if it is unable to connect to the remote repo + """ + retries = kwargs.pop("retries", 3) + try: + output = subprocess.check_output(*args, **kwargs) + if output is not None: + sys.stdout.write(output) + return output + except subprocess.CalledProcessError as e: + retries -= 1 + if retries > 0 and "fatal: Could not read from remote repository" in e.output: + # Connection failed, so wait a couple of secs and try again + time.sleep(2) + call_git_command(*args, retries=retries, **kwargs) + else: + raise + + +def fetch_sources(url, branch, dir=None, clone_dirname=None): + """ + Fetches sources from a git repository. If the repository doesn't exist it'll be cloned into `dir_name`, otherwise if it already has been + cloned, the repo will just be updated. + """ + # Setup the defaults + if dir is None: + dir = os.getcwd() + if clone_dirname is None: + clone_dirname = url.split('/')[-1].replace(".git", "") + + # If the dir already exists update the content, otherwise clone it + clone_dir = os.path.abspath(os.path.join(dir, clone_dirname)) + if os.path.exists(os.path.join(clone_dir, ".git")): + cmd = ["git", "pull", "-f"] + cmd_dir = clone_dir + + # Do a checkout to make sure we are on the right branch + checkout_cmd = ["git", "checkout", branch] + subprocess.check_output(checkout_cmd, cwd=cmd_dir, stderr=subprocess.STDOUT) + else: + cmd = ["git", "clone", "-b", branch, url, clone_dirname] + cmd_dir = os.path.abspath(dir) + + # Execute the command + call_git_command(cmd, cwd=cmd_dir, stderr=subprocess.STDOUT) + + +def sync_directories(src_dir, dest_dir, ignore=None): + """ + Syncs two directories so that the both contain the same content, with the exception of ignored files. + """ + if ignore is None: + ignore = [] + ignore.extend(CMP_IGNORE_FILES) + + dcmp = filecmp.dircmp(src_dir, dest_dir, ignore) + _sync_directories_dircmp(dcmp) + + +def _sync_directories_dircmp(dcmp): + # Remove files that only exist in the dest directory + for filename in dcmp.right_only: + right = os.path.join(dcmp.right, filename) + if os.path.isfile(right): + os.remove(right) + else: + shutil.rmtree(right) + + # Copy files that only exist in the source directory or files that have changed + for filename in dcmp.left_only+dcmp.common_files: + left = os.path.join(dcmp.left, filename) + right = os.path.join(dcmp.right, filename) + if os.path.isfile(left): + shutil.copy2(left, right) + else: + shutil.copytree(left, right) + + # Sync sub directories + for subdcmp in dcmp.subdirs.values(): + _sync_directories_dircmp(subdcmp) + + +def commit_and_push_changes(git_dir, git_branch, git_upstream_branch): + """ + Adds, commits and pushes any changes to a local git repository. + """ + # Add all the changes + add_cmd = ["git", "add", "--all"] + subprocess.check_call(add_cmd, cwd=git_dir) + try: + # Commit the changes + commit_cmd = ["git", "commit", "-m", "Merge branch 'upstream/" + git_upstream_branch + "' into " + git_branch, + "--author", "CCS OSE Build Script "] + call_git_command(commit_cmd, cwd=git_dir, stderr=subprocess.STDOUT) + # Push the changes + push_cmd = ["git", "push"] + call_git_command(push_cmd, cwd=git_dir, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + if e.output is None or "nothing to commit" not in e.output: + raise + + +def parse_repo_config(config_file, distro, version): + # Make sure the repo config file exists + if not os.path.isfile(config_file): + log.error("Failed loading the repo configuration from %s", config_file) + sys.exit(-1) + + parser = ConfigParser.SafeConfigParser() + parser.read(config_file) + + repo_urls = dict() + section_name = distro + "-" + version + if parser.has_section(section_name): + for (key, value) in parser.items(section_name): + repo_urls[key] = value + + return repo_urls + + +def main(): + parser = setup_parser() + args = parser.parse_args() + logging.basicConfig(format='%(message)s', level=logging.INFO, stream=sys.stdout) + + # Copy down the latest files + if not args.no_upstream_fetch: + log.info("Fetching the upstream sources") + fetch_sources(args.upstream_url, args.upstream_branch, clone_dirname=CLONE_DIR) + + config = find_build_config_file() + src_dir = os.path.dirname(config) + + # Parse the build config + data = parse_build_config(config) + + # Filter the list of books that should be built + book_nodes = [node for node in data if check_node_distro_matches(node, args.distro)] + + # Make the new source tree + dest_dir = os.path.join(os.getcwd(), "drupal-build", args.distro) + if not args.no_clean: + log.info("Cleaning the drupal-build directory") + if os.path.exists(dest_dir): + shutil.rmtree(dest_dir) + os.makedirs(dest_dir) + elif not os.path.exists(dest_dir): + os.makedirs(dest_dir) + + info = { + 'title': args.title, + 'product-author': args.author, + 'product-version': args.version, + 'product': args.product, + 'distro': args.distro, + 'src_dir': src_dir, + 'dest_dir': dest_dir, + 'data': data, + 'book_nodes': book_nodes, + 'all_in_one': args.all_in_one, + 'preface-title': "", + "upstream_branch": args.upstream_branch + } + + # Build the master files + log.info("Building the drupal files") + build_master_files(info) + + # Copy the original data and reformat for drupal + reformat_for_drupal(info) + + if has_errors: + sys.exit(1) + + if args.push: + # Parse the repo urls + config_file = os.path.join(os.path.dirname(__file__), 'repos.ini') + repo_urls = parse_repo_config(config_file, args.distro, args.version) + + # Make sure the base git dire exists + base_git_dir = os.path.join(os.getcwd(), "gitlab-repos") + ensure_directory(base_git_dir) + + # Checkout the gitlab repo, copy the changes and push them back up + for book_dir, gitlab_repo_url in repo_urls.items(): + build_book_dir = os.path.join(dest_dir, book_dir) + git_dirname = gitlab_repo_url.split('/')[-1].replace(".git", "") + git_dir = os.path.join(base_git_dir, git_dirname) + + try: + log.info("Fetching " + book_dir + " sources from GitLab") + fetch_sources(gitlab_repo_url, args.branch, base_git_dir, git_dirname) + + log.info("Syncing " + book_dir) + sync_directories(build_book_dir, git_dir, ["docinfo.xml"]) + + log.info("Pushing " + book_dir + " changes back to GitLab") + commit_and_push_changes(git_dir, args.branch, args.upstream_branch) + except subprocess.CalledProcessError as e: + if e.output: + sys.stdout.write(e.output) + raise + +if __name__ == "__main__": + main()