diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index 4bf6a80d65a..e6ee1dc8de1 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -48,6 +48,10 @@ Add(Net_DwConv_BN_000 PASS fuse_batchnorm_with_dwconv)
 Add(Net_DwConv_BN_001 PASS fuse_batchnorm_with_dwconv)
 Add(Net_FC_Gelu_FC_000 PASS replace_with_fc_gelu_fc)
 Add(Net_FullyConnected_Add_000 PASS fold_fully_connected)
+Add(Net_FullyConnected_Mul_000 PASS fuse_mul_with_fullyconnected)
+Add(Net_FullyConnected_Mul_001 PASS fuse_mul_with_fullyconnected)
+Add(Net_FullyConnected_Mul_002 PASS fuse_mul_with_fullyconnected)
+Add(Net_FullyConnected_Mul_003 PASS fuse_mul_with_fullyconnected)
 Add(Net_Gelu_000 PASS fuse_gelu)
 Add(Net_Gelu_001 PASS fuse_gelu)
 Add(Net_Horizontal_FullyConnected_Add_000 PASS fuse_horizontal_fc_layers)
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index 757c368f31d..ccea8c65788 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -118,6 +118,8 @@ int entry(int argc, char **argv)
              "This will fuse Mul operation with a preceding Conv if possible.");
   add_switch(arser, "--fuse_mul_with_div",
              "This will fuse Mul operation with a Div operation whose numerator is const.");
+  add_switch(arser, "--fuse_mul_with_fullyconnected",
+             "This will fuse Mul operator with a preceding FullyConnected operator.");
   add_switch(arser, "--fuse_slice_with_tconv",
              "This will fuse Slice operation with a preceding TConv if possible.");
   add_switch(arser, "--fuse_transpose_with_mean",
@@ -326,6 +328,8 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::FuseMulWithConv);
   if (arser.get<bool>("--fuse_mul_with_div"))
     options->enable(Algorithms::FuseMulWithDiv);
+  if (arser.get<bool>("--fuse_mul_with_fullyconnected"))
+    options->enable(Algorithms::FuseMulWithFullyConnected);
   if (arser.get<bool>("--make_batchnorm_gamma_positive"))
     options->enable(Algorithms::MakeBatchNormGammaPositive);
   if (arser.get<bool>("--fuse_preactivation_batchnorm"))
diff --git a/compiler/luci-pass-value-py-test/test.lst b/compiler/luci-pass-value-py-test/test.lst
index e2c37517486..d610f980bf2 100644
--- a/compiler/luci-pass-value-py-test/test.lst
+++ b/compiler/luci-pass-value-py-test/test.lst
@@ -33,6 +33,10 @@ eval(Net_Dequantize_Add_000 fold_dequantize)
 eval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
 eval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
 eval(Net_FullyConnected_Add_000 fold_fully_connected)
+eval(Net_FullyConnected_Mul_000 fuse_mul_with_fullyconnected)
+eval(Net_FullyConnected_Mul_001 fuse_mul_with_fullyconnected)
+eval(Net_FullyConnected_Mul_002 fuse_mul_with_fullyconnected)
+eval(Net_FullyConnected_Mul_003 fuse_mul_with_fullyconnected)
 eval(Net_Horizontal_FullyConnected_Add_000 fuse_horizontal_fc_layers)
 eval(Net_InstanceNorm_001 fuse_instnorm)
 eval(Net_InstanceNorm_002 fuse_instnorm)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 9cbd26f0da5..8a1eb6d4f78 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -49,6 +49,7 @@ class CircleOptimizer final
       FuseMeanWithMean,
       FuseMulWithConv,
       FuseMulWithDiv,
+      FuseMulWithFullyConnected,
       FuseTransposeWithMean,
       ResolveCustomOpAdd,
       ResolveCustomOpBatchMatMul,
diff --git a/compiler/luci/pass/include/luci/Pass/FuseMulWithFullyConnectedPass.h b/compiler/luci/pass/include/luci/Pass/FuseMulWithFullyConnectedPass.h
new file mode 100644
index 00000000000..718039f1c69
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseMulWithFullyConnectedPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_MUL_WITH_FULLYCONNECTED_PASS_H__
+#define __LUCI_FUSE_MUL_WITH_FULLYCONNECTED_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Mul into CircleFullyConnected
+ */
+struct FuseMulWithFullyConnectedPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseMulWithFullyConnectedPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_MUL_WITH_FULLYCONNECTED_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 840c8dd25dd..e4bf84eeef9 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -48,6 +48,7 @@
 #include "luci/Pass/FuseMeanWithMeanPass.h"
 #include "luci/Pass/FuseMulWithConvPass.h"
 #include "luci/Pass/FuseMulWithDivPass.h"
+#include "luci/Pass/FuseMulWithFullyConnectedPass.h"
 #include "luci/Pass/FusePreActivationBatchNormPass.h"
 #include "luci/Pass/FusePReluPass.h"
 #include "luci/Pass/FuseGeluPass.h"
@@ -278,6 +279,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
   phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
 
+  if (_options->query(Options::Algorithm::FuseMulWithFullyConnected))
+  {
+    phase.emplace_back(std::make_unique<FuseMulWithFullyConnectedPass>());
+  }
   if (_options->query(Options::Algorithm::CommonSubExpressionElimination))
   {
     phase.emplace_back(std::make_unique<luci::CommonSubExpressionEliminationPass>());
@@ -310,6 +315,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<FuseMulWithDivPass>());
   }
+  if (_options->query(Options::Algorithm::FuseMulWithFullyConnected))
+  {
+    phase.emplace_back(std::make_unique<FuseMulWithFullyConnectedPass>());
+  }
   if (_options->query(Options::Algorithm::ResolveCustomOpMaxPoolWithArgmax))
   {
     phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>());
diff --git a/compiler/luci/pass/src/FuseMulWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseMulWithFullyConnectedPass.cpp
new file mode 100644
index 00000000000..d4fb75953ed
--- /dev/null
+++ b/compiler/luci/pass/src/FuseMulWithFullyConnectedPass.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseMulWithFullyConnectedPass.h"
+
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+#define RETURN_FALSE_UNLESS(cond) \
+  if (not(cond))                  \
+    return false;
+
+inline bool is_single_element(const luci::CircleConst *node)
+{
+  return ((node->rank() == 1 || node->rank() == 0) && node->size<loco::DataType::FLOAT32>() == 1);
+}
+
+inline void update_with_single_element(luci::CircleConst *fused_node,
+                                       const luci::CircleConst *multiplication)
+{
+  for (uint32_t i = 0; i < fused_node->size<loco::DataType::FLOAT32>(); i++)
+  {
+    fused_node->at<loco::DataType::FLOAT32>(i) *= multiplication->at<loco::DataType::FLOAT32>(0);
+  }
+}
+
+luci::CircleConst *gen_fused_weights(luci::CircleConst *weights,
+                                     const luci::CircleConst *multiplication)
+{
+  auto fused_weights = luci::clone(weights);
+  // Single element multiplication:
+  if (is_single_element(multiplication))
+  {
+    update_with_single_element(fused_weights, multiplication);
+  }
+  // N-size multiplication:
+  else
+  {
+    // Go along channels, multiplication size is ensured to be compatible with channels.
+    auto count = fused_weights->dim(0).value();
+    auto size = fused_weights->dim(fused_weights->rank() - 1).value();
+    float val;
+    for (uint32_t c = 0; c < count; c++)
+    {
+      val = multiplication->at<loco::DataType::FLOAT32>(c);
+      for (uint32_t i = 0; i < size; i++)
+      {
+        fused_weights->at<loco::DataType::FLOAT32>(c * size + i) *= val;
+      }
+    }
+  }
+  return fused_weights;
+}
+
+luci::CircleConst *gen_fused_bias(luci::CircleConst *bias, const luci::CircleConst *multiplication)
+{
+  auto fused_bias = luci::clone(bias);
+  // Single element multiplication:
+  if (is_single_element(multiplication))
+  {
+    update_with_single_element(fused_bias, multiplication);
+  }
+  // N-size multiplication:
+  else
+  {
+    // Go along channels, multiplication size is ensured to be compatible with channels.
+    for (uint32_t i = 0; i < fused_bias->size<loco::DataType::FLOAT32>(); i++)
+    {
+      fused_bias->at<loco::DataType::FLOAT32>(i) *= multiplication->at<loco::DataType::FLOAT32>(i);
+    }
+  }
+  return fused_bias;
+}
+
+/**
+ *  Fuse Mul to FullyConnected if the multiplied value is a channel(last dimension)-wise constant
+ *
+ *  BEFORE
+ *                |
+ *      [CircleFullyConnected]
+ *                |
+ *           [CircleMul]
+ *                |
+ *
+ *  AFTER
+ *                |
+ *       [CircleFullyConnected]   [CircleMul] (dead)
+ *                |
+ *
+ */
+bool fuse_mul_with_fc(luci::CircleMul *mul)
+{
+  // Sanity check:
+  RETURN_FALSE_UNLESS(mul);
+  // Allow Mul node only with FLOAT32 data type:
+  RETURN_FALSE_UNLESS(mul->dtype() == loco::DataType::FLOAT32);
+  // Check if any FC node connects to Mul.
+  // Find the pattern of Mul(FC, CircleConst):
+  luci::CircleFullyConnected *fc = nullptr;
+  luci::CircleConst *multiplication = nullptr;
+  RETURN_FALSE_UNLESS(luci::fill(&fc, &multiplication).with_commutative_args_of(mul));
+  /**
+   *  Make sure that FullyConnected has only one successor.
+   *
+   *  If the FullyConnected output is connected to more nodes,
+   *  this pass will replace node with new fused FullyConnected.
+   *  Thus pass success will only introduce extra FullyConnected
+   *  without reducing overall number of nodes.
+   *  Which tends to increase model's size and degrades model's performance.
+   *  Thus one successor is required to benefit from this pass.
+   *
+   *  Example graph that illustrates the described scenario:
+   *
+   *  BEFORE
+   *                |
+   *      [CircleFullyConnected]
+   *                |
+   *        +-------+----------------+
+   *        |                        |
+   *        |                        |
+   *  [Other Node]              [CircleMul]
+   *        |                        |
+   *
+   *  AFTER
+   *                |
+   *      [CircleFullyConnected]
+   *                |
+   *        +-------+-----------------------+
+   *        |                               |
+   *        |                               |
+   *  [Other Node]       [New CircleFullyConnected Fused with Mul]
+   *        |                               |
+   *
+   */
+  RETURN_FALSE_UNLESS(loco::succs(fc).size() == 1);
+  // Allow only FLOAT32 data type:
+  RETURN_FALSE_UNLESS(fc->dtype() == loco::DataType::FLOAT32);
+  // Allow only without activation functions as values are going to
+  // be multiplied before activation function.
+  RETURN_FALSE_UNLESS(fc->fusedActivationFunction() == luci::FusedActFunc::NONE);
+  // Check for weights being Constant:
+  auto weights = dynamic_cast<luci::CircleConst *>(fc->weights());
+  RETURN_FALSE_UNLESS(weights);
+  // Get rank of multiplication:
+  auto rank = multiplication->rank();
+  // Check that all dimensions are ones, checks broadcast capabilites.
+  // Last dimesion of multiplication must be compatible with FC.
+  // N-D case (N>1):
+  if (multiplication->rank() > 1)
+  {
+    // Check channel-wise broadcasting:
+    for (uint32_t i = 0; i < rank - 1; i++)
+      RETURN_FALSE_UNLESS(multiplication->dim(i).value() == 1);
+    // Check the last dimesion of Mul is the same with the first dimension of FullyConnected
+    RETURN_FALSE_UNLESS(multiplication->dim(rank - 1) == weights->dim(0));
+  }
+  // 1-D or scalar case:
+  else if (multiplication->rank() == 1)
+  {
+    RETURN_FALSE_UNLESS(multiplication->size<loco::DataType::FLOAT32>() == 1 ||
+                        multiplication->size<loco::DataType::FLOAT32>() == weights->dim(0));
+  }
+  else if (multiplication->rank() == 0)
+  {
+    RETURN_FALSE_UNLESS(multiplication->size<loco::DataType::FLOAT32>() == 1);
+  }
+
+  // Only supports:
+  // (1) constant bias
+  // (2) no bias
+  auto bias = loco::must_cast<luci::CircleNode *>(fc->bias());
+  if (bias->opcode() == luci::CircleOpcode::CIRCLECONST)
+  {
+    // Create new bias to be updated with values:
+    auto const_bias = dynamic_cast<luci::CircleConst *>(fc->bias());
+    RETURN_FALSE_UNLESS(const_bias)
+    RETURN_FALSE_UNLESS(const_bias->dtype() == loco::DataType::FLOAT32);
+    // Create new bias with updated values and replace:
+    auto fused_bias = gen_fused_bias(const_bias, multiplication);
+    fc->bias(fused_bias);
+  }
+  else if (bias->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+  {
+    return false;
+  }
+
+  // Create new weights with updated values and replace:
+  auto fused_weights = gen_fused_weights(weights, multiplication);
+  fc->weights(fused_weights);
+
+  // Set origin and copy Activation Function if exisitng:
+  fc->fusedActivationFunction(mul->fusedActivationFunction());
+  luci::add_origin(fc, luci::get_origin(mul));
+
+  replace(mul).with(fc);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseMulWithFullyConnectedPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto mul = dynamic_cast<luci::CircleMul *>(node))
+    {
+      if (fuse_mul_with_fc(mul))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseMulWithFullyConnectedPass.test.cpp b/compiler/luci/pass/src/FuseMulWithFullyConnectedPass.test.cpp
new file mode 100644
index 00000000000..a4f9d6bf087
--- /dev/null
+++ b/compiler/luci/pass/src/FuseMulWithFullyConnectedPass.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseMulWithFullyConnectedPass.h"
+#include "helpers/CreateCircleConst.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+#define DIM_ONE 8
+#define DIM_TWO 4
+#define MUL_VAL 2.0f
+
+namespace
+{
+
+using namespace luci::test;
+
+/**
+ *  Graph for this test
+ *
+ *  BEFORE (without extra_fc_successor)
+ *
+ *         [FC]
+ *           |
+ *     [Mul w/ Relu]
+ *
+ *  BEFORE (with extra_fc_successor)
+ *
+ *         [FC]
+ *           |
+ *           |-------------------
+ *           |                  |
+ *           |                  |
+ *     [Mul w/ Relu]       [other FC]
+ *
+ *  AFTER (if pass applied)
+ *
+ *      [FC w/ Relu] (weights and bias updated)
+ *
+ */
+class FCMulGraphlet
+{
+public:
+  void init(loco::Graph *g, luci::FusedActFunc fc_activation, bool is_mul_scalar, bool use_bias,
+            bool extra_successor)
+  {
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+
+    std::vector<float> weights_val(DIM_ONE * DIM_TWO);
+    for (uint32_t i = 0; i < DIM_ONE * DIM_TWO; i++)
+      weights_val.at(i) = i;
+
+    _fc_f = luci::create_const_node(g, loco::DataType::FLOAT32, {DIM_ONE, DIM_TWO}, weights_val);
+    _fc->weights(_fc_f);
+
+    if (use_bias)
+    {
+      std::vector<float> bias_val(DIM_ONE);
+      for (uint32_t i = 0; i < DIM_ONE; i++)
+        bias_val.at(i) = i;
+
+      _fc_b = luci::create_const_node(g, loco::DataType::FLOAT32, {DIM_ONE}, bias_val);
+    }
+    else
+    {
+      // Create CircleOutputExclude -- no bias
+      _fc_b = g->nodes()->create<luci::CircleOutputExclude>();
+    }
+    _fc->bias(_fc_b);
+
+    _fc->fusedActivationFunction(fc_activation);
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->shape({1, DIM_ONE});
+    _fc->name("fc");
+
+    if (extra_successor)
+    {
+      _extra_succ = g->nodes()->create<luci::CircleFullyConnected>();
+      // Set previous FC as input to bump number of successors for it:
+      _extra_succ->input(_fc);
+      std::vector<float> weights_val(DIM_ONE * DIM_TWO);
+      _extra_f =
+        luci::create_const_node(g, loco::DataType::FLOAT32, {DIM_ONE, DIM_TWO}, weights_val);
+      _extra_succ->weights(_extra_f);
+      _extra_succ->bias(nullptr);
+      _extra_succ->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _extra_succ->dtype(loco::DataType::FLOAT32);
+      _extra_succ->shape({1, DIM_ONE});
+      _extra_succ->name("extra_fc");
+    }
+
+    std::vector<float> mul_values;
+
+    if (is_mul_scalar)
+    {
+      mul_values.push_back(static_cast<float>(MUL_VAL));
+      _mul_c = luci::create_const_node(g, loco::DataType::FLOAT32, {}, mul_values);
+    }
+    else
+    {
+      for (uint32_t i = 0; i < DIM_ONE; i++)
+      {
+        mul_values.push_back(static_cast<float>(i));
+      }
+      _mul_c = luci::create_const_node(g, loco::DataType::FLOAT32, {1, 1, 1, DIM_ONE}, mul_values);
+    }
+
+    _mul = g->nodes()->create<luci::CircleMul>();
+    _mul->x(_fc);
+    _mul->y(_mul_c);
+    _mul->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _mul->dtype(loco::DataType::FLOAT32);
+    if (is_mul_scalar)
+    {
+      _mul->shape({1, DIM_ONE});
+    }
+    else
+    {
+      _mul->shape({1, 1, 1, DIM_ONE});
+    }
+    _mul->name("mul");
+  }
+
+public:
+  luci::CircleFullyConnected *fc() { return _fc; }
+
+  void to_fm_bias(void)
+  {
+    assert(_fc != nullptr);
+
+    auto new_fc = _fc->graph()->nodes()->create<luci::CircleFullyConnected>();
+    _fc->bias(new_fc);
+  }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_fc_f = nullptr;
+  luci::CircleNode *_fc_b = nullptr;
+  luci::CircleConst *_mul_c = nullptr;
+  luci::CircleFullyConnected *_extra_succ = nullptr;
+  luci::CircleConst *_extra_f = nullptr;
+};
+
+class FuseMulWithFCTestGraph : public TestIOGraph, public FCMulGraphlet
+{
+public:
+  void init(luci::FusedActFunc fc_activation, bool is_mul_scalar, bool use_bias,
+            bool extra_successor)
+  {
+    TestIOGraph::init({1, DIM_TWO}, {1, DIM_ONE});
+    FCMulGraphlet::init(g(), fc_activation, is_mul_scalar, use_bias, extra_successor);
+
+    _fc->input(input());
+
+    output()->from(_mul);
+  }
+};
+
+class FuseMulWithFullyConnectedPassTest : public ::testing::Test
+{
+public:
+  FuseMulWithFCTestGraph g;
+  luci::FuseMulWithFullyConnectedPass pass;
+};
+
+} // namespace
+
+TEST_F(FuseMulWithFullyConnectedPassTest, fc_mul_tensor)
+{
+  g.init(luci::FusedActFunc::NONE, false /* is_mul_scalar */, true /* use_bias */,
+         false /* extra_successor */);
+
+  EXPECT_EQ(true, pass.run(g.g()));
+
+  auto fc = dynamic_cast<luci::CircleFullyConnected *>(g.output()->from());
+  EXPECT_NE(nullptr, fc);
+
+  auto weights = loco::must_cast<luci::CircleConst *>(g.fc()->weights());
+  auto weights_n = weights->dim(0).value();
+  auto weights_m = weights->dim(1).value();
+  uint32_t offset = 0;
+  for (uint32_t i = 0; i < weights_n; i++)
+  {
+    for (uint32_t j = 0; j < weights_m; j++)
+    {
+      offset = i * weights_m + j;
+      EXPECT_EQ(i * offset, weights->at<loco::DataType::FLOAT32>(offset));
+    }
+  }
+
+  auto bias = loco::must_cast<luci::CircleConst *>(g.fc()->bias());
+  for (uint32_t i = 0; i < bias->size<loco::DataType::FLOAT32>(); i++)
+  {
+    EXPECT_EQ(i * i, bias->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+TEST_F(FuseMulWithFullyConnectedPassTest, fc_mul_scalar)
+{
+  g.init(luci::FusedActFunc::NONE, true /* is_mul_scalar */, true /* use_bias */,
+         false /* extra_successor */);
+
+  EXPECT_EQ(true, pass.run(g.g()));
+
+  auto fc = dynamic_cast<luci::CircleFullyConnected *>(g.output()->from());
+  EXPECT_NE(nullptr, fc);
+
+  auto weights = loco::must_cast<luci::CircleConst *>(g.fc()->weights());
+  auto weights_n = weights->dim(0).value();
+  auto weights_m = weights->dim(1).value();
+  uint32_t offset = 0;
+  for (uint32_t i = 0; i < weights_n; i++)
+  {
+    for (uint32_t j = 0; j < weights_m; j++)
+    {
+      offset = i * weights_m + j;
+      EXPECT_EQ(MUL_VAL * offset, weights->at<loco::DataType::FLOAT32>(offset));
+    }
+  }
+
+  auto bias = loco::must_cast<luci::CircleConst *>(g.fc()->bias());
+  for (uint32_t i = 0; i < bias->size<loco::DataType::FLOAT32>(); i++)
+  {
+    EXPECT_EQ(MUL_VAL * i, bias->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+TEST_F(FuseMulWithFullyConnectedPassTest, fc_no_bias)
+{
+  g.init(luci::FusedActFunc::NONE, false /* is_mul_scalar */, false /* use_bias */,
+         false /* extra_successor */);
+
+  EXPECT_EQ(true, pass.run(g.g()));
+
+  auto fc = dynamic_cast<luci::CircleFullyConnected *>(g.output()->from());
+  EXPECT_NE(nullptr, fc);
+  auto no_bias = dynamic_cast<luci::CircleOutputExclude *>(fc->bias());
+  ASSERT_NE(nullptr, no_bias);
+
+  auto weights = loco::must_cast<luci::CircleConst *>(g.fc()->weights());
+  auto weights_n = weights->dim(0).value();
+  auto weights_m = weights->dim(1).value();
+  uint32_t offset = 0;
+  for (uint32_t i = 0; i < weights_n; i++)
+  {
+    for (uint32_t j = 0; j < weights_m; j++)
+    {
+      offset = i * weights_m + j;
+      EXPECT_EQ(i * offset, weights->at<loco::DataType::FLOAT32>(offset));
+    }
+  }
+}
+
+TEST_F(FuseMulWithFullyConnectedPassTest, bias_feature_map_NEG)
+{
+  g.init(luci::FusedActFunc::NONE, false /* is_mul_scalar */, true /* use_bias */,
+         false /* extra_successor */);
+
+  // Bias cannot be fused as it's passed as feature map.
+  g.to_fm_bias();
+
+  EXPECT_EQ(false, pass.run(g.g()));
+}
+
+TEST_F(FuseMulWithFullyConnectedPassTest, fc_with_activation_NEG)
+{
+  g.init(luci::FusedActFunc::RELU, false /* is_mul_scalar */, true /* use_bias */,
+         false /* extra_successor */);
+
+  EXPECT_EQ(false, pass.run(g.g()));
+}
+
+TEST_F(FuseMulWithFullyConnectedPassTest, fc_with_null_weights_NEG)
+{
+  g.init(luci::FusedActFunc::NONE, false /* is_mul_scalar */, true /* use_bias */,
+         false /* extra_successor */);
+
+  g.fc()->weights(nullptr);
+
+  EXPECT_EQ(false, pass.run(g.g()));
+}
+
+TEST_F(FuseMulWithFullyConnectedPassTest, fc_with_extra_successor_NEG)
+{
+  g.init(luci::FusedActFunc::NONE, false /* is_mul_scalar */, true /* use_bias */,
+         true /* extra_successor */);
+
+  EXPECT_EQ(false, pass.run(g.g()));
+}
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt
index fefbabf9a17..d6656545ff8 100644
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -174,6 +174,7 @@ Current transformation options are
 - fuse_mul_to_fullyconnected_weights : This fuses Mul operator to following FullyConnected operator weights
 - fuse_mul_with_conv: This fuses Mul with a preceding Convolution op if possible.
 - fuse_mul_with_div: This fuses Mul and Div op as Div.
+- fuse_mul_with_fullyconnected: This fuses Mul operator with the preceding FullyConnected operator if possible.
 - fuse_slice_with_tconv: This fuses Slice with a preceding TConv if possible.
 - fuse_bcq: This enables Binary-Coded-bases Quantized DNNs
    - read https://arxiv.org/abs/2005.09904 for detailed information
diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py
index 8c5de1b646d..a8dabf139d0 100644
--- a/compiler/one-cmds/onelib/constant.py
+++ b/compiler/one-cmds/onelib/constant.py
@@ -52,6 +52,7 @@ class CONSTANT:
         'fuse_mean_with_mean',
         'fuse_mul_with_conv',
         'fuse_mul_with_div',
+        'fuse_mul_with_fullyconnected',
         'fuse_transpose_with_mean',
         'fuse_slice_with_tconv',
         'fuse_horizontal_fc_layers',
@@ -131,6 +132,7 @@ class CONSTANT:
         ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'),
         ('fuse_mul_with_conv', 'fuse Mul op to Convolution op'),
         ('fuse_mul_with_div', 'fuse Mul with Div as Div'),
+        ('fuse_mul_with_fullyconnected', 'fuse Mul op to FullyConnected op'),
         ('fuse_transpose_with_mean',
          'fuse Mean with a preceding Transpose under certain conditions'),
         ('fuse_horizontal_fc_layers',
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_000/test.recipe
new file mode 100644
index 00000000000..84203a12d04
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_000/test.recipe
@@ -0,0 +1,67 @@
+operand {
+    name: "ifm"
+    type: FLOAT32
+    shape { dim: 1 dim: 1 dim: 6 }
+}
+operand {
+    name: "fc_wgt"
+    type: FLOAT32
+    shape { dim: 6 dim: 6 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "fc_bias"
+    type: FLOAT32
+    shape { dim: 6 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "B"
+    type: FLOAT32
+    shape { dim: 1, dim: 1, dim: 6 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0" 
+    }
+}
+operand {
+    name: "fc_out"
+    type: FLOAT32
+    shape: { dim: 1 dim: 1 dim: 6 }
+}
+operand {
+    name: "mul_out"
+    type: FLOAT32
+    shape: { dim: 1 dim: 1 dim: 6 }
+}
+operation {
+    type: "FullyConnected"
+    fullyconnected_options {
+        activation: NONE
+        keep_num_dims: true
+    }
+    input: "ifm"
+    input: "fc_wgt"
+    input: "fc_bias"
+    output: "fc_out"
+}
+operation {
+    type: "Mul"
+    mul_options {
+        activation: NONE
+    }
+    input: "fc_out"
+    input: "B"
+    output: "mul_out"
+}
+input: "ifm"
+output: "mul_out"
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_000/test.rule b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_000/test.rule
new file mode 100644
index 00000000000..c1f2a827884
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_000/test.rule
@@ -0,0 +1,12 @@
+# This checks if:
+#   Mul(FC(input, weights, bias), other)
+# is converted to:
+#   FC(input, Mul(weights, other), Mul(bias, other))
+# and then Mul is fused to:
+#   FC(input, weights', bias')
+# Here Mul is in shape of (1, 1, X).
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_001/test.recipe
new file mode 100644
index 00000000000..d446424c238
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_001/test.recipe
@@ -0,0 +1,67 @@
+operand {
+    name: "ifm"
+    type: FLOAT32
+    shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+    name: "fc_wgt"
+    type: FLOAT32
+    shape { dim: 6 dim: 4 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "fc_bias"
+    type: FLOAT32
+    shape { dim: 6 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "B"
+    type: FLOAT32
+    shape { dim: 6 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "fc_out"
+    type: FLOAT32
+    shape: { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+    name: "mul_out"
+    type: FLOAT32
+    shape: { dim: 3 dim: 1 dim: 6 }
+}
+operation {
+    type: "FullyConnected"
+    fullyconnected_options {
+        activation: NONE
+        keep_num_dims: true
+    }
+    input: "ifm"
+    input: "fc_wgt"
+    input: "fc_bias"
+    output: "fc_out"
+}
+operation {
+    type: "Mul"
+    mul_options {
+        activation: RELU
+    }
+    input: "fc_out"
+    input: "B"
+    output: "mul_out"
+}
+input: "ifm"
+output: "mul_out"
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_001/test.rule b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_001/test.rule
new file mode 100644
index 00000000000..acdd2d6a96b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_001/test.rule
@@ -0,0 +1,12 @@
+# This checks if:
+#   Mul(FC(input, weights, bias), other)
+# is converted to:
+#   FC(input, Mul(weights, other), Mul(bias, other))
+# and then Mul is fused to:
+#   FC(input, weights', bias')
+# Here Mul is in shape of (X).
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_002/test.recipe b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_002/test.recipe
new file mode 100644
index 00000000000..34e3cde4839
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_002/test.recipe
@@ -0,0 +1,66 @@
+operand {
+    name: "ifm"
+    type: FLOAT32
+    shape { dim: 1 dim: 16 }
+}
+operand {
+    name: "fc_wgt"
+    type: FLOAT32
+    shape { dim: 4 dim: 16 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "fc_bias"
+    type: FLOAT32
+    shape { dim: 4 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "B"
+    type: FLOAT32
+    shape { dim: 1 }
+    filler {
+        tag: "constant"
+        arg: "2.0"
+    }
+}
+operand {
+    name: "fc_out"
+    type: FLOAT32
+    shape: { dim: 1 dim: 4 }
+}
+operand {
+    name: "mul_out"
+    type: FLOAT32
+    shape: { dim: 1 dim: 4 }
+}
+operation {
+    type: "FullyConnected"
+    fullyconnected_options {
+        activation: NONE
+        keep_num_dims: true
+    }
+    input: "ifm"
+    input: "fc_wgt"
+    input: "fc_bias"
+    output: "fc_out"
+}
+operation {
+    type: "Mul"
+    mul_options {
+        activation: NONE
+    }
+    input: "fc_out"
+    input: "B"
+    output: "mul_out"
+}
+input: "ifm"
+output: "mul_out"
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_002/test.rule b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_002/test.rule
new file mode 100644
index 00000000000..9cc8d5fd0a7
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_002/test.rule
@@ -0,0 +1,12 @@
+# This checks if:
+#   Mul(FC(input, weights, bias), other)
+# is converted to:
+#   FC(input, Mul(weights, other), Mul(bias, other))
+# and then Mul is fused to:
+#   FC(input, weights', bias')
+# Here Mul is in shape of (1), it's a scalar.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_003/test.recipe b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_003/test.recipe
new file mode 100644
index 00000000000..2883ebabdf0
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_003/test.recipe
@@ -0,0 +1,57 @@
+operand {
+    name: "ifm"
+    type: FLOAT32
+    shape { dim: 3 dim: 1 dim: 4 }
+}
+operand {
+    name: "fc_wgt"
+    type: FLOAT32
+    shape { dim: 6 dim: 4 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "scale"
+    type: FLOAT32
+    shape { dim: 6 }
+    filler {
+        tag: "gaussian"
+        arg: "0.0"
+        arg: "1.0"
+    }
+}
+operand {
+    name: "fc_out"
+    type: FLOAT32
+    shape: { dim: 3 dim: 1 dim: 6 }
+}
+operand {
+    name: "mul_out"
+    type: FLOAT32
+    shape: { dim: 3 dim: 1 dim: 6 }
+}
+operation {
+    type: "FullyConnected"
+    fullyconnected_options {
+        activation: NONE
+        keep_num_dims: true
+    }
+    input: "ifm"
+    input: "fc_wgt"
+    input: ""
+    output: "fc_out"
+}
+operation {
+    type: "Mul"
+    mul_options {
+        activation: RELU
+    }
+    input: "fc_out"
+    input: "scale"
+    output: "mul_out"
+}
+input: "ifm"
+output: "mul_out"
diff --git a/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_003/test.rule b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_003/test.rule
new file mode 100644
index 00000000000..16bb2ff2788
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_FullyConnected_Mul_003/test.rule
@@ -0,0 +1,13 @@
+# This checks if:
+#   Mul(FC(input, weights, _), other)
+# is converted to:
+#   FC(input, Mul(weights, other), _)
+# and then Mul is fused to:
+#   FC(input, weights', _)
+# Here the bias is empty/excluded "_".
+# Thus Mul is only fused with weights.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "FC_EXIST"                $(op_count FULLY_CONNECTED) '=' 1