diff --git a/examples/benchmarks/bert/src/bert_layers.py b/examples/benchmarks/bert/src/bert_layers.py index b2c5ff267..085c16c0e 100644 --- a/examples/benchmarks/bert/src/bert_layers.py +++ b/examples/benchmarks/bert/src/bert_layers.py @@ -63,8 +63,8 @@ IMPL_USE_FLASH2 = False # Import Flash Attention 2, which supports ALiBi https://github.com/Dao-AILab/flash-attention try: - from flash_attn import flash_attn_qkvpacked_func # type: ignore - installed_version = importlib.metadata.version('flash_attn') # type: ignore + from flash_attn import flash_attn_qkvpacked_func # type: ignore + installed_version = importlib.metadata.version('flash_attn') # type: ignore if installed_version < '2.4.2': raise ImportError('newer version of flash_attn required (>= 2.4.2)') IMPL_USE_FLASH2 = True @@ -278,7 +278,7 @@ def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor, attention = flash_attn_qkvpacked_func( qkv, dropout_p=self.p_dropout, alibi_slopes=slopes) - attention = attention.to(orig_dtype) # type: ignore + attention = attention.to(orig_dtype) # type: ignore bias = bias.to(bias_dtype) else: attention = flash_attn_qkvpacked_func( @@ -302,14 +302,14 @@ def forward(self, hidden_states: torch.Tensor, cu_seqlens: torch.Tensor, bias_dtype = bias.dtype bias = bias.to(half) attention = flash_attn_qkvpacked_func(qkv, bias) - attention = attention.to(orig_dtype) # type: ignore + attention = attention.to(orig_dtype) # type: ignore bias = bias.to(bias_dtype) else: attention = flash_attn_qkvpacked_func(qkv, bias) # attn_mask is 1 for attend and 0 for don't attend attention = bert_padding_module.unpad_input_only( - attention, # type: ignore + attention, # type: ignore torch.squeeze(attn_mask) == 1) return rearrange(attention, 'nnz h d -> nnz (h d)') diff --git a/examples/end-to-end-examples/support_chatbot/__init__.py b/examples/end-to-end-examples/support_chatbot/__init__.py index 642d7387a..cb0d03b22 100644 --- a/examples/end-to-end-examples/support_chatbot/__init__.py +++ b/examples/end-to-end-examples/support_chatbot/__init__.py @@ -1,3 +1,2 @@ # Copyright 2022 MosaicML Examples authors # SPDX-License-Identifier: Apache-2.0 - diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/conversion/convert_txt_to_stream.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/conversion/convert_txt_to_stream.yaml index 506952be1..f2d63083e 100644 --- a/examples/end-to-end-examples/support_chatbot/mcli_yamls/conversion/convert_txt_to_stream.yaml +++ b/examples/end-to-end-examples/support_chatbot/mcli_yamls/conversion/convert_txt_to_stream.yaml @@ -11,7 +11,7 @@ integrations: # Clone the examples repository so that we have access to the code in sec_10k_qa - integration_type: git_repo git_repo: YOUR_GITHUB_USERNAME/examples - #git_branch: support-bot + # git_branch: support-bot ssh_clone: false # Should be true if using a private repo path: /workspace/examples # Tell MCLI what path to clone the repo to diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_composer_codebase.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_composer_codebase.yaml index 526dcc9cc..6644a8aaa 100644 --- a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_composer_codebase.yaml +++ b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_composer_codebase.yaml @@ -5,7 +5,7 @@ compute: ## These configurations are optional # cluster: r0z0 # Name of the cluster to use for this run - #gpu_type: h100_80gb # Type of GPU to use. + # gpu_type: h100_80gb # Type of GPU to use. integrations: # Clone and install the llm-foundry repo so we can run scripts from it @@ -25,7 +25,7 @@ command: | cd llm-foundry/scripts composer train/train.py /mnt/config/parameters.yaml || (echo "Command failed - killing python" && pkill python && exit 1) -#image: "mosaicml/llm-foundry:2.0.1_cu118-latest" +# image: "mosaicml/llm-foundry:2.0.1_cu118-latest" image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 # The parameters section is mounted to /mnt/config/parameters.yaml in the container @@ -60,7 +60,7 @@ parameters: model: name: hf_causal_lm init_device: mixed # Initially only create the model on CPU once per node to reduce system memory requirements - pretrained_model_name_or_path: mosaicml/mpt-30b-chat # This can be changed to other models from the HuggingFace model hub + pretrained_model_name_or_path: mosaicml/mpt-30b-chat # This can be changed to other HF models pretrained: true # If false, will just load the model architecture and randomly initialize the weights config_overrides: # Override the default model config (comment this out if you change the model from MPT) attn_config: diff --git a/examples/end-to-end-examples/support_chatbot/repo_downloader.py b/examples/end-to-end-examples/support_chatbot/repo_downloader.py index 85d9cd686..889781235 100644 --- a/examples/end-to-end-examples/support_chatbot/repo_downloader.py +++ b/examples/end-to-end-examples/support_chatbot/repo_downloader.py @@ -91,7 +91,9 @@ def prepare_output_file(self, file_path: str) -> str: return output_file def file_to_txt(self, file_path: (str)) -> None: - """Given the file_path of a file in cloned repository, downloads it to a + """Given the file_path of a file in cloned repository, downloads it to + a. + .txt file and saves it in the same directory structure in. /scripts/train/support_chatbot/retrieval_data/{self.repo_name}