Skip to content

Commit

Permalink
Update on "Bump ExecuTorch's PyTorch nightly pin to dev20241121"
Browse files Browse the repository at this point in the history
Require at least 11/18 to unblock #7040 .

Differential Revision: [D66398425](https://our.internmc.facebook.com/intern/diff/D66398425/)

[ghstack-poisoned]
  • Loading branch information
swolchok committed Nov 26, 2024
2 parents cdf0625 + 8c15b6c commit aaa7768
Show file tree
Hide file tree
Showing 52 changed files with 3,430 additions and 132 deletions.
11 changes: 11 additions & 0 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
shift 2
;;
-pt2e_quantize)
PT2E_QUANTIZE="$2"
shift 2
;;
-upload)
UPLOAD_DIR="$2"
shift 2
Expand All @@ -44,6 +48,9 @@ MODE=${MODE:-"xnnpack+custom"}
# Default UPLOAD_DIR to empty string if not set
UPLOAD_DIR="${UPLOAD_DIR:-}"

# Default PT2E_QUANTIZE to empty string if not set
PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"

if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
echo "Expecting atleast 4 positional arguments"
echo "Usage: [...]"
Expand Down Expand Up @@ -234,6 +241,10 @@ if [[ "${COREML}" == "ON" ]]; then
fi
if [[ "${QNN}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
fi
fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-wheels-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
test-infra-ref: main
with-cuda: disabled
with-rocm: disabled
python-versions: '["3.10", "3.11", "3.12"]'

build:
needs: generate-matrix
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-wheels-m1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
test-infra-ref: main
with-cuda: disabled
with-rocm: disabled
python-versions: '["3.10", "3.11", "3.12"]'

build:
needs: generate-matrix
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ jobs:
strategy:
matrix:
dtype: [fp32]
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
mode: [qnn]
fail-fast: false
with:
Expand All @@ -384,6 +385,7 @@ jobs:
DTYPE=${{ matrix.dtype }}
BUILD_TOOL="cmake"
MODE=${{ matrix.mode }}
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
Expand All @@ -393,7 +395,7 @@ jobs:
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
test-phi-3-mini-runner-linux:
name: test-phi-3-mini-runner-linux
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,39 @@ jobs:
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
echo "::endgroup::"
test-llama-runner-qnn-linux:
name: test-llama-runner-qnn-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
dtype: [fp32]
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
mode: [qnn]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
BUILD_TOOL="cmake"
DTYPE=${{ matrix.dtype }}
MODE=${{ matrix.mode }}
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@
[submodule "third-party/pybind11"]
path = third-party/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3"]
path = backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3
url = https://github.com/foss-xtensa/nnlib-FusionG3/
[submodule "third-party/ao"]
path = third-party/ao
url = https://github.com/pytorch/ao.git
2 changes: 1 addition & 1 deletion backends/apple/coreml/scripts/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ rm -rf "$COREML_DIR_PATH/third-party"
mkdir "$COREML_DIR_PATH/third-party"

echo "${green}ExecuTorch: Cloning coremltools."
git clone --depth 1 --branch 8.0 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
git clone --depth 1 --branch 8.1 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
cd $COREMLTOOLS_DIR_PATH

STATUS=$?
Expand Down
18 changes: 5 additions & 13 deletions backends/apple/coreml/test/test_coreml_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,23 +71,15 @@ def test_vit_skip_conv(self):
)
)

conv_block = ["aten.convolution.default", "executorch_call_delegate"]
safe_softmax_block = [
"getitem",
"getitem",
"getitem",
"getitem",
"aten.any.dim",
"executorch_call_delegate",
]
final_block = ["getitem"]
total = conv_block + 12 * safe_softmax_block + final_block

assert [
node.target.__name__
for node in delegated_program_manager.exported_program().graph.nodes
if node.op == "call_function"
] == total
] == [
"aten.convolution.default",
"executorch_call_delegate",
"getitem",
]

def test_buffer(self):
embedding_dim = 3
Expand Down
33 changes: 30 additions & 3 deletions backends/arm/arm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(self):
self.permute_nhwc = False
self.quantize_io = False
self.tosa_version = None
self.input_order = None

def ethosu_compile_spec(
self,
Expand Down Expand Up @@ -89,7 +90,7 @@ def ethosu_compile_spec(
self.compiler_flags.append(extra_flags)

base_tosa_version = "TOSA-0.80.0+BI"
if "U55" in config:
if "u55" in config:
# Add the Ethos-U55 extension marker
base_tosa_version += "+u55"
self.tosa_version = TosaSpecification.create_from_string(base_tosa_version)
Expand Down Expand Up @@ -134,6 +135,14 @@ def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
self.quantize_io = quantize_io
return self

def set_input_order(self, input_order: str = None) -> "ArmCompileSpecBuilder":
"""
Reorder the inputs coming in. This may be required when inputs > 1.
And while using the U55/U85 CompileSpec.
"""
self.input_order = input_order
return self

def build(self) -> List[CompileSpec]:
"""
Generate a list of compile spec objects from the builder
Expand Down Expand Up @@ -163,6 +172,13 @@ def build(self) -> List[CompileSpec]:
CompileSpec("permute_memory_format", "nhwc".encode())
)

if self.input_order:
self.compile_spec.append(
CompileSpec(
"input_order", " ".join(map(str, self.input_order)).encode()
)
)

if self.quantize_io:
self.compile_spec.append(CompileSpec("quantize_io", "True".encode()))

Expand Down Expand Up @@ -214,13 +230,16 @@ def preprocess( # noqa: C901
artifact_path = None
output_format = ""
compile_flags = []
input_order = []
for spec in compile_spec:
if spec.key == "debug_artifact_path":
artifact_path = spec.value.decode()
if spec.key == "output_format":
output_format = spec.value.decode()
if spec.key == "compile_flags":
compile_flags.append(spec.value.decode())
if spec.key == "input_order":
input_order = list(map(int, spec.value.decode().split(",")))

# Check that the output format is set in the compile spec
if not output_format:
Expand All @@ -246,19 +265,27 @@ def preprocess( # noqa: C901
)

node_visitors = get_node_visitors(edge_program, tosa_spec)

input_count = 0
for node in graph_module.graph.nodes:
if node.op == "call_function":
process_call_function(node, tosa_graph, node_visitors, tosa_spec)
elif node.op == "placeholder":
process_placeholder(node, tosa_graph, edge_program, tosa_spec)
if node.name in edge_program.graph_signature.user_inputs:
input_count += 1
elif node.op == "output":
process_output(node, tosa_graph)
else:
# This will only happen if an unpartitioned graph is passed without
# any checking of compatibility.
dbg_fail(node, tosa_graph, artifact_path)

if len(input_order) > 0:
if input_count != len(input_order):
raise RuntimeError(
"The rank of the input order is not equal to amount of input tensors"
)

# TODO: It would be awesome if this dump could somehow be done on top level and not here.
# Problem is that the desc.json has to be created on the tosa_graph object, which we can't
# access from top level.
Expand All @@ -275,7 +302,7 @@ def preprocess( # noqa: C901
# preprocess and some consume TOSA fb directly.
if output_format == "vela":
# Emit vela_bin_stream format
binary = vela_compile(tosa_graph, compile_flags)
binary = vela_compile(tosa_graph, compile_flags, input_order)
elif output_format == "tosa":
# Emit TOSA flatbuffer
binary = bytes(tosa_graph.serialize())
Expand Down
15 changes: 9 additions & 6 deletions backends/arm/arm_vela.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@

# Pack either input or output tensor block, compose the related arrays into
# per-io structs to simplify runtime use.
def vela_bin_pack_io(prefix, data):
ios = struct.pack("<i", len(data[prefix + "_shape"]))
for i in range(len(data[prefix + "_shape"])):
io_shape = data[prefix + "_shape"][i]
def vela_bin_pack_io(prefix, data, shape_order=None):
vela_input_shapes = data[prefix + "_shape"]

order = shape_order if shape_order else range(len(vela_input_shapes))
ios = struct.pack("<i", len(vela_input_shapes))
for i in order:
io_shape = vela_input_shapes[i]
io_elem_size = data[prefix + "_elem_size"][i]
io_offset = data[prefix + "_offset"][i]
io_region = data[prefix + "_region"][i]
Expand All @@ -36,7 +39,7 @@ def vela_bin_pack_io(prefix, data):
# Output via Vela to binary stream for ArmBackendEthosU
# WARNING: Do not change this without changing VelaBinStream.cpp as that
# function consumes this format and the two need to align.
def vela_compile(tosa_graph, args: List[str]):
def vela_compile(tosa_graph, args: List[str], shape_order=None):
with tempfile.TemporaryDirectory() as tmpdir:
tosaname = "out.tosa"
flatbuffer = tosa_graph.serialize()
Expand Down Expand Up @@ -78,7 +81,7 @@ def vela_compile(tosa_graph, args: List[str]):
bin_blocks["scratch_data"] = b"\x00" * block_length

# Capture inputs and outputs
bin_blocks["inputs"] = vela_bin_pack_io("input", data)
bin_blocks["inputs"] = vela_bin_pack_io("input", data, shape_order)
bin_blocks["outputs"] = vela_bin_pack_io("output", data)

bin_blocks["vela_end_stream"] = b""
Expand Down
7 changes: 6 additions & 1 deletion backends/arm/operator_support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@

# pyre-unsafe

from . import mean_dim_support, tosa_supported_operators, var_correction_support # noqa
from . import ( # noqa
mean_dim_support,
right_shift_support,
tosa_supported_operators,
var_correction_support,
)
35 changes: 35 additions & 0 deletions backends/arm/operator_support/right_shift_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2024 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


import logging

import torch.fx as fx
from executorch.backends.arm.operator_support.tosa_supported_operators import (
register_tosa_support_check,
SupportedTOSAOperatorCheck,
)
from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
from executorch.exir.dialects._ops import ops as exir_ops

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)


@register_tosa_support_check
class RightShiftSupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.__rshift__.Scalar]

tosa_specs = [
TosaSpecification.create_from_string("TOSA-0.80.0+BI"),
TosaSpecification.create_from_string("TOSA-0.80.0+MI"),
]

def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification):

# TODO MLETORCH-525 Remove warning
if isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset:
logging.warning(f"{node.target} may introduce one-off errors.")
return True
1 change: 1 addition & 0 deletions backends/arm/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
op_reciprocal,
op_relu,
op_repeat,
op_rshift,
op_rsqrt,
op_select,
op_sigmoid,
Expand Down
Loading

0 comments on commit aaa7768

Please sign in to comment.