From 3b162e4fbee7e5832a0600e8c9373d6fcc0e0f27 Mon Sep 17 00:00:00 2001 From: Petr Zemek Date: Sat, 3 May 2014 08:47:44 +0200 Subject: [PATCH] Put the script sources from vim.org/scripts to GitHub + cleanup. --- CHANGELOG | 128 ++++++++++++ CONTRIBUTORS | 3 + LICENSE | 280 +++++++++++++++++++++++++ README.md | 197 ++++++++++++++++++ plugin/AutoFenc.vim | 492 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1100 insertions(+) create mode 100644 CHANGELOG create mode 100644 CONTRIBUTORS create mode 100644 LICENSE create mode 100644 README.md create mode 100644 plugin/AutoFenc.vim diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..c7f9d29 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,128 @@ +1.5.1 (2014-05-03) + +- Cleanup. +- Put the sources to GitHub (https://github.com/s3rvac/AutoFenc). + +1.5 (2012-03-17) + +Thanks to Ingo Karkat for the updates in this version. +- Supported HTML/XML/CSS file types have been made configurable and added more + defaults. +- Do not emit the "unrecognized charset" message when the encoding is known. + +1.4 (2012-03-11) + +Thanks to Ingo Karkat for the updates in this version. +- Improved the detection regexp for comments: + - added "fileencoding" and "charset"; + - demands that there is a whitespace in front of the keyword, so that + "daycoding" doesn't match; + - g:autofenc_autodetect_commentexpr allows to configure the pattern + for comment detection. +- Introduced g:autofenc_enc_blacklist to disable some encodings. For + example, the enca tool has a tendency to detect plain text files as UTF-7. + With the blacklist, AutoFenc can be instructed to ignore those encodings. +- The check for ASCII is set to be case-insensitive because enca reports + this in uppercase, so the condition fails unless ignorecase is set. +- Keeps changed CWD with 'autochdir' setting by temporarily disabling it. For + example, suppose that a user has ":lcd .." in after/ftplugin/gitcommit.vim + and that he is in the Git root directory, not the .git subdir when composing + a commit message. The reload of the buffer by AutoFenc (via :edit) again + triggered the automatic change of the working dir, and therefore the + customization was lost. The 'autochdir' setting needs to be temporarily + disabled to avoid that. +- Added a support for plain Vim 7.0 in the shellescape() emulation from + version 1.3.4. Otherwise, there were errors in Vim 7.0. + +1.3.4 (2012-02-27) + +- Don't override when the user explicitly sets file encoding with ++enc (thanks + to Benjamin Fritz). +- Fixed TOhtml version detection (again) and made sure line continuations can + actually be used (thanks to Benjamin Fritz and Ingo Karkat). +- Disabled the option shellslash on Windows before calling shellescape() (it + may cause problems on Windows, thanks for the tip goes to Benjamin Fritz). + +1.3.3 (2011-11-29) + +Thanks to Ingo Karkat for the updates in this version. +- Fixed a problem in the TOhtml detection when, for example, + g:loaded_2html_plugin = 'vim7.3_v6'. +- The return code of the call of an external program via system(ext_prog_cmd) + is now checked. This prevents Vim interpreting an error message as an + encoding. +- shellescape() is now used instead of quoting file_path manually. + +1.3.2 (2011-11-24) + +Thanks to Benjamin Fritz for the updates in this version. +- Fixed the detection of the version of the TOhtml plugin. + +1.3.1 (2011-07-23) + +Thanks to Benjamin Fritz for the updates in this version. +- Fixed the plugin behavior when reloading a file with different settings. + +1.3 (2011-04-22) +Thanks to Benjamin Fritz for the updates in this version. +- Added support for HTML version 5 encoding detection. +- The script now dies gracefully in old Vims. +- 'g:autofenc_autodetect_comment_num_of_lines' renamed to + 'g:autofenc_autodetect_num_of_lines' + +1.2.1 (2011-04-13) + +- Fixed a typo in a variable name (this resulted in an error in some + occasions). Thanks to Charles Lee for pointing this bug out. + +1.2 (2011-03-31) + +Thanks to Benjamin Fritz for the updates in this version. +- TOhtml's IANA name/Vim encoding conversion functions are now used. +- Changed BOM detection so it does not duplicate a check Vim already did by + default (i.e. default to off if ucs-bom is in the 'fileencodings'). +- Put autocmds in the AutoFenc augroup for easier handling. +- Made autocmd nested so we don't need to worry about restoring everything that + other autocmds may set (e.g. syntax). +- Jumplist or cursor position during detection are not affected. +- The g:autofenc_autodetect_num_of_lines option is now used also in + HTML/XML/CSS detection routines (previously only used for encoding specified + in comments). +- Improved HTML charset line regex. +- Added an option (g:autofenc_emit_message) to emit messages about the + detected/used encoding upon opening a file. + +1.1.1 (2009-10-03) + +- Fixed the comment encoding detection function (the encoding was not detected + if there were some alphanumeric characters before the "encoding" string, like + in "# vim:fileencoding="). + +1.1 (2009-08-16) + +- Added three configuration possibilites to disable autodetection for specific + files (based on file size, file type and file path). See script description + for more info. + +1.0.2 (2009-08-11) + +- Fixed the XML encoding detection function. +- Minor code and documentation fixes. + +1.0.1 (2009-08-02) + +- Encoding autodetection is now performed only if the opened file exists (is + stored somewhere). So, for example, the autodetection is now not performed + when a new file is opened. +- Correctly works with .viminfo, where the last cursor position in the file is + stored when exiting the file. In the previous version of this script, this + information was sometimes ignored and the cursor was initially on the very + last line in a file. If the user does not use this .viminfo feature (or he + does not use .viminfo at all), then the cursor will be initially placed on + the very first line. +- Fixed the implementation of the function which sets the detected + encoding. + +1.0 (2009-07-26) + +- Initial release version of this script. diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 index 0000000..bdec6c1 --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,3 @@ +Benjamin Fritz +Ingo Karkat +Petr Zemek diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d8cf7d4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,280 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS diff --git a/README.md b/README.md new file mode 100644 index 0000000..cafb94d --- /dev/null +++ b/README.md @@ -0,0 +1,197 @@ +AutoFenc +======== + +A Vim plugin that tries to automatically detect and set file encoding when +opening a file. + +Description +----------- + +This Vim plugin tries to automatically detect and set file encoding when +opening a file. It does this in several possible ways, depending on the +configuration. + +The following methods are implemented. When a method fails, the plugin tries +the next one. + +1. detection of BOM (byte-order-mark) at the beginning of the file, only for + some multibyte encodings +2. HTML way of encoding detection (via the `` tag), only for HTML-based + file types +3. XML way of encoding detection (via the `` declaration), only for + XML-based file types +4. CSS way of encoding detection (via `@charset 'at-rule'`), only for CSS files +5. checks whether the encoding is specified in a comment (like `# Encoding: + latin2`), for all file types +6. tries to detect the encoding via the specified external program (the default + one is [enca](https://github.com/nijel/enca)), for all file types + +If the autodetection fails, it is up to Vim and your configuration to set the +encoding. + +Installation +------------ + +The recommended way of installing this plugin is by using +[Pathogen](https://github.com/tpope/vim-pathogen). + +If you cannot use Pathogen or you want to install this plugin manually, just +put the `plugin/AutoFenc.vim` file into your `$HOME/.vim/plugin` directory +(Linux-like systems) or `%UserProfile%\vimfiles\plugin` folder (Windows +systems). + +Configuration Options +--------------------- + +The plugin has the following configuration options. You can set them in your +`$HOME/.vimrc` file. + +- `g:autofenc_enable` (0 or 1, default 1) + + Enables/disables this plugin. + +- `g:autofenc_emit_messages` (0 or 1, default 0) + + Emits messages about the detected/used encoding upon opening a file. + +- `g:autofenc_max_file_size` (number >= 0, default 10485760) + + If the size of a file is higher than this value (in bytes), then the + autodetection will not be performed. + +- `g:autofenc_disable_for_files_matching` (regular expression, see the settings + in `plugin/AutoFenc.vim`) + + If the file (with complete path) matches this regular expression, then the + autodetection will not be performed. It is by default set to disable + autodetection for non-local files (e.g. accessed via ftp or scp) because the + script cannot handle some kind of autodetection for these files. The regular + expression is matched case-sensitively. + +- `g:autofenc_disable_for_file_types` (list of strings, default `[]`) + + If the file type matches some of the filetypes specified in this list, then + the autodetection will not be performed. The comparison is done + case-sensitively. + +- `g:autofenc_autodetect_bom` (0 or 1, default 0 if `ucs-bom` is in + `fileencodings`, 1 otherwise) + + Enables/disables detection of encoding by BOM. + +- `g:autofenc_autodetect_html` (0 or 1, default 1) + + Enables/disables detection of encoding for HTML-based documents. + +- `g:autofenc_autodetect_html_filetypes` (regular expression, see the settings + in `plugin/AutoFenc.vim`) + + Regular expression for all supported HTML file types. + +- `g:autofenc_autodetect_xml` (0 or 1, default 1) + + Enables/disables detection of encoding for XML-based documents. + +- `g:autofenc_autodetect_xml_filetypes` (regular expression, see the settings + in `plugin/AutoFenc.vim`) + + Regular expression for all supported XML file types. + +- `g:autofenc_autodetect_css` (0 or 1, default 1) + + Enables/disables detection of encoding for CSS documents. + +- `g:autofenc_autodetect_css_filetypes` (regular expression, see the settings + in `plugin/AutoFenc.vim`) + + Regular expression for all supported CSS file types. + +- `g:autofenc_autodetect_comment` (0 or 1, default 1) + + Enables/disables detection of encoding in comments. + +- `g:autofenc_autodetect_commentexpr` (regular expression, see the settings in + `plugin/AutoFenc.vim`) + + Pattern for detection of encodings specified in a comment. + +- `g:autofenc_autodetect_num_of_lines` (number >= 0, default 5) + + How many lines from the beginning and from the end of the file should be + searched for the possible encoding declaration. + +- `g:autofenc_autodetect_ext_prog` (0 or 1, default 1) + + Enables/disables detection of encoding via an external program (see + additional settings below). + +- `g:autofenc_ext_prog_path` (string, default `'enca'`) + + Path to the external program. It can be either a relative or absolute path. + The external program can take any number of arguments, but the last one has + to be a path to the file for which the encoding is to be detected (it will be + supplied by this plugin). The output of the program has to be the name of the + encoding in which the file is saved (a string on a single line). + +- `g:autofenc_ext_prog_args` (string, default `'-i -L czech'`) + + Additional program arguments (can be none, i.e. `''`). + +- `g:autofenc_ext_prog_unknown_fenc` (string, default `'???'`) + + If the output of the external program is this string, then it means that the + file encoding was not detected successfully. The string has to be + case-sensitive. + +- `g:autofenc_enc_blacklist` (regular expression, default `''`) + + If the detected encoding matches this regular expression, it will be ignored. + +Requirements +------------ + +The `filetype` plugin has to be be enabled (a line like `filetype plugin on` +has to be in your `$HOME/.vimrc` (Linux-like systems) or `%UserProfile%\_vimrc` +(Windows systems). + +Notes +----- + +This script is by all means NOT perfect, but it works for me and suits my needs +very well, so it might be also useful for you. Your feedback, opinion, +suggestions, bug reports, patches, simply anything you have to say is welcomed! + +There are similar plugins to this one, so if you do not like this one, you can +test these: + +* [FencView.vim](http://www.vim.org/scripts/script.php?script_id=1708): + Mainly supports detection of encodings for asian languages. +* [MultiEnc.vim](http://www.vim.org/scripts/script.php?script_id=1806): + Obsolete, merged with the previous one. +* [charset.vim](http://www.vim.org/scripts/script.php?script_id=199): + Not very complete/correct and last update in 2002. +* [Detect encoding from the charset specified in HTML + files](http://vim.wikia.com/wiki/Detect_encoding_from_the_charset_specified_in_HTML_files): + Same basic ideas but only for HTML files. + +Let me know if there are others and I will add them here. + +License +------- + +Copyright (c) 2009-2014 Petr Zemek + +Distributed under GNU GPLv2: + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . diff --git a/plugin/AutoFenc.vim b/plugin/AutoFenc.vim new file mode 100644 index 0000000..85be858 --- /dev/null +++ b/plugin/AutoFenc.vim @@ -0,0 +1,492 @@ +" File: AutoFenc.vim +" Brief: Tries to automatically detect file encoding. +" Author: Petr Zemek +" Version: 1.5.1 +" +" Description: +" A Vim plugin that tries to automatically detect and set file encoding when +" opening a file. See https://github.com/s3rvac/AutoFenc for more details. +" +" License: +" Copyright (C) 2009-2014 Petr Zemek +" +" This program is free software: you can redistribute it and/or modify it +" under the terms of the GNU General Public License as published by the Free +" Software Foundation, either version 2 of the License, or (at your option) +" any later version. +" +" This program is distributed in the hope that it will be useful, but WITHOUT +" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +" FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +" more details. +" +" You should have received a copy of the GNU General Public License +" along with this program. If not, see . +" + +" Check if the plugin was already loaded. Also, die gracefully if the used Vim +" version is too old. +if exists('autofenc_loaded') || v:version < 700 + finish +endif +" Make the loaded variable actually useful by including the version number. +let autofenc_loaded = '1.5.1' + +" This plugin uses line continuations. +if &cpo =~ 'C' + let s:oldcpo = &cpo + set cpo-=C +endif + +"------------------------------------------------------------------------------- +" Checks whether the selected variable (first parameter) is already set and +" if not, it sets it to the value of the second parameter. +"------------------------------------------------------------------------------- +function s:CheckAndSetVar(var, value) + if !exists(a:var) + exec 'let ' . a:var . ' = ' . string(a:value) + endif +endfunction + +" Configuration options initialization (see the script description for more +" information). +call s:CheckAndSetVar('g:autofenc_enable', 1) +call s:CheckAndSetVar('g:autofenc_emit_messages', 0) +call s:CheckAndSetVar('g:autofenc_max_file_size', 10485760) +call s:CheckAndSetVar('g:autofenc_disable_for_files_matching', '^[-_a-zA-Z0-9]\+://') +call s:CheckAndSetVar('g:autofenc_disable_for_file_types', []) +call s:CheckAndSetVar('g:autofenc_autodetect_bom', (&fileencodings !~# 'ucs-bom')) +call s:CheckAndSetVar('g:autofenc_autodetect_html', 1) +call s:CheckAndSetVar('g:autofenc_autodetect_html_filetypes', '^\(html.*\|xhtml\|aspperl\|aspvbs\|cf\|dtml\|gsp\|jsp\|mason\|php\|plp\|smarty\|spyce\|webmacro\)$') +call s:CheckAndSetVar('g:autofenc_autodetect_xml', 1) +call s:CheckAndSetVar('g:autofenc_autodetect_xml_filetypes', '^\(xml\|xquery\|xsd\|xslt\?\|ant\|dsl\|mxml\|svg\|wsh\|xbl\)$') +call s:CheckAndSetVar('g:autofenc_autodetect_css', 1) +call s:CheckAndSetVar('g:autofenc_autodetect_css_filetypes', '^\(css\|sass\)$') +call s:CheckAndSetVar('g:autofenc_autodetect_comment', 1) +call s:CheckAndSetVar('g:autofenc_autodetect_commentexpr', '\c^\A\(.*\s\)\?\(\(\(file\)\?en\)\?coding\|charset\)[:=]\?\s*\zs[-A-Za-z0-9_]\+') +call s:CheckAndSetVar('g:autofenc_autodetect_num_of_lines', 5) +call s:CheckAndSetVar('g:autofenc_autodetect_ext_prog', 1) +call s:CheckAndSetVar('g:autofenc_ext_prog_path', 'enca') +call s:CheckAndSetVar('g:autofenc_ext_prog_args', '-i -L czech') +call s:CheckAndSetVar('g:autofenc_ext_prog_unknown_fenc', '???') +call s:CheckAndSetVar('g:autofenc_enc_blacklist', '') + +"------------------------------------------------------------------------------- +" Normalizes the selected encoding and returns it, so it can be safely used as +" a new encoding. This function should be called before a new encoding is set. +"------------------------------------------------------------------------------- +function s:NormalizeEncoding(enc) + let nenc = tolower(a:enc) + + " The recent versions of the TOhtml runtime plugin have some nice charset + " to encoding functions which even allow user overrides. Use them if + " available. + let nenc2 = "" + silent! let nenc2 = tohtml#EncodingFromCharset(nenc) + if nenc2 != "" + return nenc2 + " If the TOhtml function is unavailable, at least handle some canonical + " encoding names in Vim. + elseif nenc =~ 'iso[-_]8859-1' + return 'latin1' + elseif nenc =~ 'iso[-_]8859-2' + return 'latin2' + elseif nenc ==? 'gb2312' + return 'cp936' " GB2312 imprecisely means CP936 in HTML + elseif nenc =~ '\(cp\|win\(dows\)\?\)-125\d' + return 'cp125'.nenc[strlen(nenc)-1] + elseif nenc == 'utf8' + return 'utf-8' + elseif g:autofenc_emit_messages && nenc !~ '^\%(8bit-\|2byte-\)\?\%(latin[12]\|utf-8\|utf-16\%(le\)\?\|ucs-[24]\%(le\)\?\|iso-8859-\d\{1,2}\|cp\d\{3,4}\)$' + echomsg 'AutoFenc: detected unrecognized charset, trying fenc='.nenc + endif + + return nenc +endfunction + +"------------------------------------------------------------------------------- +" Sets the selected file encoding. Returns 1 if the file was reloaded, +" 0 otherwise. +"------------------------------------------------------------------------------- +function s:SetFileEncoding(enc) + let nenc = s:NormalizeEncoding(a:enc) + + " Check whether we're not trying to set the current file encoding. + if nenc != "" && nenc !=? &fenc + if exists('&autochdir') && &autochdir + " Other autocmds may have changed the window's working directory; + " when 'autochdir' is set, the :edit will reset that, so + " temporarily disable the setting. + let old_autochdir = &autochdir + set noautochdir + endif + try + " Set the file encoding and reload it, keeping any user-specified + " fileformat, and keeping any bad bytes in case the header is wrong + " (this won't let the user save if a conversion error happened on + " read). + exec 'edit ++enc='.nenc.' ++ff='.&ff.' ++bad=keep' + finally + if exists('old_autochdir') + let &autochdir = old_autochdir + endif + endtry + + " The file was reloaded. + return 1 + else + " The file was not reloaded. + return 0 + endif +endfunction + +"------------------------------------------------------------------------------- +" Tries to detect a BOM (byte order mark) at the beginning of the file to +" detect multibyte encodings. If there is a BOM, it returns the appropriate +" encoding, otherwise the empty string is returned. +"------------------------------------------------------------------------------- +function s:BOMEncodingDetection() + " The implementation of this function is based on a part of the + " FencsView.vim plugin by Ming Bai + " (http://www.vim.org/scripts/script.php?script_id=1708). + + " Get the first line of the file. + let file_content = readfile(expand('%:p'), 'b', 1) + if file_content == [] + " Empty file + return '' + endif + let first_line = file_content[0] + + " Check whether it contains BOM and if so, return appropriate encoding. + " Note: If the index is out of bounds, ahx is set to '' automatically. + let ah1 = first_line[0] + let ah2 = first_line[1] + let ah3 = first_line[2] + let ah4 = first_line[3] + " TODO: I don't know why but if there is a NUL byte, the char2nr() function + " transforms it to a newline (0x0A) instead of 0x00... + let a1 = char2nr(ah1) == 0x0A ? 0x00 : char2nr(ah1) + let a2 = char2nr(ah2) == 0x0A ? 0x00 : char2nr(ah2) + let a3 = char2nr(ah3) == 0x0A ? 0x00 : char2nr(ah3) + let a4 = char2nr(ah4) == 0x0A ? 0x00 : char2nr(ah4) + if a1.a2.a3.a4 == 0x00.0x00.0xfe.0xff + return 'utf-32' + elseif a1.a2.a3.a4 == 0xff.0xfe.0x00.0x00 + return 'utf-32le' + elseif a1.a2.a3 == 0xef.0xbb.0xbf + return 'utf-8' + elseif a1.a2 == 0xfe.0xff + return 'utf-16' + elseif a1.a2 == 0xff.0xfe + return 'utf-16le' + endif + + " There was no legal BOM. + return '' +endfunction + +"------------------------------------------------------------------------------- +" Tries the HTML way of encoding detection of the current file and returns the +" detected encoding (or the empty string, if the encoding was not detected). +"------------------------------------------------------------------------------- +function s:HTMLEncodingDetection() + " This method is based on the meta tag in the head of the HTML document + " () + + " Store the actual position in the file and move to the very first line in + " the file. + let curpos=getpos('.') + keepjumps 1 + + let enc = '' + + let charset_line = search('\c g:autofenc_max_file_size || file_size < 0 || + \ file_path =~ g:autofenc_disable_for_files_matching || + \ index(g:autofenc_disable_for_file_types, &ft, 0, 1) != -1 + return '' + endif + + " BOM encoding detection. + if g:autofenc_autodetect_bom + let enc = s:BOMEncodingDetection() + if enc != '' + return enc + endif + endif + + " HTML encoding detection. + if g:autofenc_autodetect_html && &filetype =~? g:autofenc_autodetect_html_filetypes + let enc = s:HTMLEncodingDetection() + if enc != '' + return enc + endif + endif + + " XML encoding detection. + if g:autofenc_autodetect_xml && &filetype =~? g:autofenc_autodetect_xml_filetypes + let enc = s:XMLEncodingDetection() + if enc != '' + return enc + endif + endif + + " CSS encoding detection. + if g:autofenc_autodetect_css && &filetype =~? g:autofenc_autodetect_css_filetypes + let enc = s:CSSEncodingDetection() + if enc != '' + return enc + endif + endif + + " Comment encoding detection. + if g:autofenc_autodetect_comment + let enc = s:CommentEncodingDetection() + if enc != '' + return enc + endif + endif + + " External program encoding detection. + if g:autofenc_autodetect_ext_prog + let enc = s:ExtProgEncodingDetection() + if enc != '' + return enc + endif + endif + + " The encoding was not detected. + return '' +endfunction + +"------------------------------------------------------------------------------- +" Main plugin function. Tries to autodetect the correct file encoding and sets +" the detected one (if any). If the ASCII encoding is detected, it does nothing +" so allow Vim to set its internal encoding instead. +"------------------------------------------------------------------------------- +function s:DetectAndSetFileEncoding() + let enc = s:DetectFileEncoding() + + " Don't call again on the nested trigger from the edit. + let b:autofenc_done = enc + + if (enc != '') && (enc !=? 'ascii') && + \ (g:autofenc_enc_blacklist == '' || enc !~? g:autofenc_enc_blacklist) + if s:SetFileEncoding(enc) + if g:autofenc_emit_messages + echomsg "AutoFenc: Detected [".enc."] from file, loaded with fenc=".&fenc + endif + endif + endif +endfunction + +" Set the detected file encoding. +if g:autofenc_enable + augroup AutoFenc + au! + " We need to check that we're not in the middle of a reload due to this + " plugin otherwise can recurse forever. But unlet the variable to allow + " re-detection on the next read of this buffer if it is just unloaded. + au BufRead * nested + \ if !exists('b:autofenc_done') | + \ if v:cmdarg !~ '++enc' | + \ call s:DetectAndSetFileEncoding() | + \ endif | + \ else | + \ unlet b:autofenc_done | + \ endif + augroup END +endif + +" Restore line continuations (and the rest of &cpo) when done. +if exists('s:oldcpo') + let &cpo = s:oldcpo + unlet s:oldcpo +endif + +" vim: noet