diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 8e1d4b9e6..6d046e132 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -256,6 +256,7 @@ under the License.
           <topicref href="topics/impala_thread_reservation_limit.xml"/>
           <topicref href="topics/impala_timezone.xml"/>
           <topicref href="topics/impala_topn_bytes_limit.xml"/>
+          <topicref href="topics/impala_utf8_mode.xml"/>
         </topicref>
       </topicref>
       <topicref href="topics/impala_show.xml"/>
@@ -292,6 +293,7 @@ under the License.
     <topicref href="topics/impala_udf.xml"/>
     <topicref href="topics/impala_langref_unsupported.xml"/>
     <topicref href="topics/impala_porting.xml"/>
+    <topicref href="topics/impala_utf_8.xml"/>
   </topicref>
   
   <topicref href="topics/impala_performance.xml">
diff --git a/docs/topics/impala_string.xml b/docs/topics/impala_string.xml
index fa3c31601..587666508 100644
--- a/docs/topics/impala_string.xml
+++ b/docs/topics/impala_string.xml
@@ -147,9 +147,7 @@ under the License.
     </p>
 
     <ul>
-      <li>
-        String manipulation functions.
-      </li>
+      <li>CHAR/VARCHAR truncating/padding.</li>
 
       <li>
         Comparison operators.
@@ -171,6 +169,8 @@ under the License.
       those national language characteristics of string data, use logic on the
       application side.
     </p>
+    <p>If you just need Hive-compatible string function behaviors on UTF-8 encoded strings, turn on
+      the query option UTF8_MODE. See more in <xref href="impala_utf_8.xml"/>.</p>
     <p>
       <b>Conversions:</b>
     </p>
diff --git a/docs/topics/impala_utf8_mode.xml b/docs/topics/impala_utf8_mode.xml
new file mode 100644
index 000000000..80ab949c4
--- /dev/null
+++ b/docs/topics/impala_utf8_mode.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="utf8_mode">
+
+ <title>UTF8_MODE Query Option</title>
+ <titlealts audience="PDF"><navtitle>UTF8_MODE</navtitle></titlealts>
+ <prolog>
+  <metadata>
+   <data name="Category" value="Impala"/>
+   <data name="Category" value="Impala Query Options"/>
+   <data name="Category" value="Troubleshooting"/>
+   <data name="Category" value="Querying"/>
+   <data name="Category" value="Developers"/>
+   <data name="Category" value="Data Analysts"/>
+  </metadata>
+ </prolog>
+
+ <conbody>
+  <p>
+   <indexterm audience="hidden">UTF8_MODE Query Option</indexterm> UTF-8 support allows string
+   functions to recognize the UTF-8 characters, thus processing strings in a compatible way as other
+   engines.</p>
+  <p>You can use the new query option, UTF8_MODE, to turn on/off the UTF-8 aware behavior. The query
+   option can be set globally, or at per session level. Only queries with UTF8_MODE=true will have
+   UTF-8 aware behaviors. If the query option UTF8_MODE is turned on globally, existing queries that
+   depend on the original binary behavior need to be explicitly set to UTF8_MODE=false.</p>
+
+  <p><b>Type:</b>BOOLEAN</p>
+  <p><b>Default:</b>FALSE</p>
+  <p><b>Added in:</b>Impala 4.1</p>
+  <p conref="../shared/impala_common.xml#common/related_info"/>
+  <p>
+   <xref href="impala_string.xml"/>,
+   <xref href="impala_utf_8.xml"/>
+  </p>
+ </conbody>
+</concept>
\ No newline at end of file
diff --git a/docs/topics/impala_utf_8.xml b/docs/topics/impala_utf_8.xml
new file mode 100644
index 000000000..fac6bce88
--- /dev/null
+++ b/docs/topics/impala_utf_8.xml
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="utf_8">
+ <title>UTF-8 Support</title>
+ <prolog>
+  <metadata>
+   <data name="Category" value="Impala"/>
+   <data name="Category" value="Impala Functions"/>
+   <data name="Category" value="utf_8"/>
+   <data name="Category" value="Developers"/>
+   <data name="Category" value="Data Analysts"/>
+  </metadata>
+ </prolog>
+ <conbody>
+  <p>Impala has traditionally offered a single-byte binary character set for STRING data type and
+   the character data is encoded in ASCII character set. Prior to this release, Impala was
+   incompatible with Hive in some functions applying on non-ASCII strings. E.g. length() in Impala
+   used to return the length of bytes of the string, while length() in Hive returns the length of
+   UTF-8 characters of the string. UTF-8 characters (code points) are assembled in variant-length
+   bytes (1~4 bytes), so the results differ when there are non-ASCII characters in the string. This
+   release provides a UTF-8 aware behavior for Impala STRING type to get consistent behavior with
+   Hive on UTF-8 strings using a query option.</p>
+  <p>UTF-8 support allows you to read and write UTF-8 from standard formats like Parquet and ORC,
+   thus improving interoperability with other engines that also support those standard formats.</p>
+ </conbody>
+ <concept id="turning_ON">
+  <title>Turning ON the UTF-8 behavior</title>
+  <conbody>
+   <p>You can use the new query option, UTF8_MODE, to turn on/off the UTF-8 aware behavior. The
+    query option can be set globally, or at per session level. Only queries with UTF8_MODE=true will
+    have UTF-8 aware behaviors.</p>
+   <p>
+    <note>If the query option UTF8_MODE is turned on globally, existing queries that depend on the
+     original binary behavior need to explicitly set UTF8_MODE=false.</note></p>
+  </conbody>
+ </concept>
+ <concept id="list_string_functions">
+  <title>List of STRING Functions</title>
+  <conbody>
+   <p>The new query option introduced will turn on the UTF-8 aware behavior of the following string
+    functions:</p>
+   <ul>
+    <li>LENGTH(STRING a)<ul id="ul_jgr_x1l_gtb">
+      <li>returns the number of UTF-8 characters instead of bytes</li>
+     </ul></li>
+    <li>SUBSTR(STRING a, INT start [, INT len])</li>
+    <li>SUBSTRING(STRING a, INT start [, INT len])()<ul id="ul_tkh_x1l_gtb">
+      <li>the substring start position and length is counted by UTF-8 characters instead of
+       bytes</li>
+     </ul></li>
+    <li>REVERSE(STRING a)<ul id="ul_o1d_jbl_gtb">
+      <li>the unit of the operation is a UTF-8 character, ie. it won't reverse bytes inside a UTF-8
+       character.<p>
+        <note>The results of reverse("最快的SQL引擎") used to be "��敼�LQS��竿倜�" and now
+         "擎引LQS的快最".</note></p></li>
+     </ul></li>
+    <li>INSTR(STRING str, STRING substr[, BIGINT position[, BIGINT occurrence]])</li>
+    <li>LOCATE(STRING substr, STRING str[, INT pos])<ul id="ul_y1p_sbl_gtb">
+      <li>These functions have an optional position argument. The return values are also positions
+       in the string. In UTF-8 mode, these positions are counted by UTF-8 characters instead of
+       bytes.</li>
+     </ul></li>
+    <li>mask functions<ul id="ul_qmg_5bl_gtb">
+      <li>The unit of the operation is a UTF-8 character, ie. they won't mask the string
+       byte-to-byte.</li>
+     </ul></li>
+    <li>upper/lower/initcap<ul id="ul_x3c_wbl_gtb">
+      <li>These functions will recognize non-ascii characters and transform them based on the
+       current locale used by the Impala process.</li>
+     </ul></li>
+   </ul>
+  </conbody>
+ </concept>
+ <concept id="limitations">
+  <title>Limitations</title>
+  <conbody>
+   <ul id="ul_dhh_dcl_gtb">
+    <li>Use the UTF8_MODE option only when needed since the performance of UTF_8 is not optimized
+     yet. It is only an experimental feature.</li>
+    <li>UTF-8 support for CHAR and VARCHAR types is not implemented yet. So VARCHAR(N) will still
+     return N bytes instead of N UTF-8 characters.</li>
+   </ul>
+  </conbody>
+ </concept>
+</concept>
\ No newline at end of file