From c5ab496debb00cb0ed04c0e7d168c017217f7274 Mon Sep 17 00:00:00 2001 From: Mathieu Maret Date: Fri, 22 Oct 2010 10:03:52 +0200 Subject: [PATCH] [vim] Add a plugin for detecting indent --- .vim/filetype.vim | 5 + .vim/plugin/indent_finder.py | 478 ++++++++++++++++++++++++++++++++++ .vim/plugin/indent_finder.vim | 10 + .vimrc | 3 + 4 files changed, 496 insertions(+) create mode 100755 .vim/plugin/indent_finder.py create mode 100755 .vim/plugin/indent_finder.vim diff --git a/.vim/filetype.vim b/.vim/filetype.vim index ef1af32..32df5c9 100644 --- a/.vim/filetype.vim +++ b/.vim/filetype.vim @@ -2,3 +2,8 @@ augroup filetypedetect au BufNewFile,BufRead .tmux.conf*,tmux.conf* setf tmux augroup END + +augroup filetypedetect + " Mail + autocmd BufRead,BufNewFile *mutt-* setfiletype mail +augroup END diff --git a/.vim/plugin/indent_finder.py b/.vim/plugin/indent_finder.py new file mode 100755 index 0000000..f98531b --- /dev/null +++ b/.vim/plugin/indent_finder.py @@ -0,0 +1,478 @@ +# +# Indentation finder, by Philippe Fremy +# Copyright 2002-2008 Philippe Fremy +# +# This program is distributed under the BSD license. You should have received +# a copy of the file LICENSE.txt along with this software. +# + +import sys +import re + +help = \ +"""Usage : %s [ --vim-output ] [ --verbose ] file1 file2 ... fileN + +Display indentation used in the list of files. Possible answers are (with X +being the number of spaces used for indentation): +space X +tab 8 +mixed tab X space Y + +mixed means that indentation style is tab at the beginning of the line (tab +being 8 positions) and then spaces to do the indentation, unless you reach 8 +spaces which are replaced by a tab. This is the vim source file indentation +for example. In my opinion, this is the worst possible style. + +--vim-output: output suitable to use inside vim: +set sts=0 | set tabstop=4 | set noexpandtab | set shiftwidth=4 + +""" + +VERSION='1.4' + +### Used when indentation is tab, to set tabstop in vim +DEFAULT_TAB_WIDTH = 4 + +### default values for files where indentation is not meaningful (empty files) +# possible values: +# DEFAULT_RESULT = ('space', 4 ) +# DEFAULT_RESULT = ('space', 2 ) +# DEFAULT_RESULT = ('space', 8 ) +# DEFAULT_RESULT = ('tab', DEFAULT_TAB_WIDTH ) + +DEFAULT_RESULT = ('space', 4 ) + +VERBOSE_QUIET = 0 +VERBOSE_INFO = 1 +VERBOSE_DEBUG = 2 +VERBOSE_DEEP_DEBUG = 3 + +DEFAULT_VERBOSITY = VERBOSE_QUIET + +### +class LineType: + NoIndent = 'NoIndent' + SpaceOnly = 'SpaceOnly' + TabOnly = 'TabOnly' + Mixed = 'Mixed' + BeginSpace = 'BeginSpace' + +def info( s ): log( VERBOSE_INFO, s ) +def dbg( s ): log( VERBOSE_DEBUG, s ) +def deepdbg( s ): log( VERBOSE_DEEP_DEBUG, s ) + +def log( level, s ): + if level <= IndentFinder.VERBOSITY: + print s + +class IndentFinder: + """ + IndentFinder reports the indentation used in a source file. Its approach + is not tied to any particular language. It was tested successfully with + python, C, C++ and Java code. + + How does it work ? + + It scans each line of the entry file for a space character (white space or + tab) repeated until a non space character is found. Such a line + is considered to be a properly indented line of code. Blank lines and + comments line (starting with # or /* or * ) are ignored. Lines coming + after a line ending in '\\' have higher chance of being not properly + indented, and are thus ignored too. + + Only the increment in indentation are fed in. Dedentation or maintaining + the same indentation is not taken into account when analysing a file. Increment + in indentation from zero indentation to some indentation is also ignored because + it's wrong in many cases (header file with many structures for example, do not always + obey the indentation of the rest of the code). + + Each line is analysed as: + - SpaceOnly: indentation of more than 8 space + - TabOnly: indentation of tab only + - Mixed: indentation of tab, then less than 8 spaces + - BeginSpace: indentation of less than 8 space, that could be either a mixed indentation + or a pure space indentation. + - non-significant + + Then two consecutive significant lines are then considered. The only valid combinations are: + - (NoIndent, BeginSpace) => space or mixed + - (NoIndent, Tab) => tab + - (BeginSpace, BeginSpace) => space or mixed + - (BeginSpace, SpaceOnly) => space + - (SpaceOnly, SpaceOnly) => space + - (TabOnly, TabOnly) => tab + - (TabOnly, Mixed) => mixed + - (Mixed, TabOnly) => mixed + + The increment in number of spaces is then recorded. + + At the end, the number of lines with space indentation, mixed space and tab indentation + are compared and a decision is made. + + If no decision can be made, DEFAULT_RESULT is returned. + + If IndentFinder ever reports wrong indentation, send me immediately a + mail, if possible with the offending file. + """ + + def __init__(self, default_result=DEFAULT_RESULT): + self.clear() + self.default_result = default_result + + VERBOSITY = DEFAULT_VERBOSITY + + def parse_file_list( self, file_list ): + for fname in file_list: + self.parse_file( fname ) + + def parse_file( self, fname ): + f = open( fname ) + l = f.readline() + while( l ): + self.analyse_line( l ) + l = f.readline() + f.close() + + def clear( self ): + self.lines = {} + for i in range(2,9): self.lines['space%d' % i] = 0 + for i in range(2,9): self.lines['mixed%d' % i] = 0 + self.lines['tab'] = 0 + + self.nb_processed_lines = 0 + self.nb_indent_hint = 0 + self.indent_re = re.compile( "^([ \t]+)([^ \t]+)" ) + self.mixed_re = re.compile( "^(\t+)( +)$" ) + self.skip_next_line = False + self.previous_line_info = None + + def analyse_line( self, line ): + if line[-1:] == '\n': + line = line[:-1] + deepdbg( 'analyse_line: "%s"' % line.replace(' ', '.' ).replace('\t','\\t') ) + self.nb_processed_lines += 1 + + skip_current_line = self.skip_next_line + self.skip_next_line = False + if line[-1:] == '\\': + deepdbg( 'analyse_line: Ignoring next line!' ) + # skip lines after lines ending in \ + self.skip_next_line = True + + if skip_current_line: + deepdbg( 'analyse_line: Ignoring current line!' ) + return + + ret = self.analyse_line_indentation( line ) + if ret: + self.nb_indent_hint += 1 + deepdbg( 'analyse_line: Result of line analysis: %s' % str(ret) ) + return ret + + def analyse_line_type( self, line ): + '''Analyse the type of line and return (LineType, ). + + The function will reject improperly formatted lines (mixture of tab + and space for example) and comment lines. + ''' + mixed_mode = False + tab_part = '' + space_part = '' + + if len(line) > 0 and line[0] != ' ' and line[0] != '\t': + return (LineType.NoIndent, '') + + mo = self.indent_re.match( line ) + if not mo: + deepdbg( 'analyse_line_type: line is not indented' ) + return None + + indent_part = mo.group(1) + text_part = mo.group(2) + + deepdbg( 'analyse_line_type: indent_part="%s" text_part="%s"' % + (indent_part.replace(' ', '.').replace('\t','\\t').replace('\n', '\\n' ), + text_part ) ) + + if text_part[0] == '*': + # continuation of a C/C++ comment, unlikely to be indented correctly + return None + + if text_part[0:2] == '/*' or text_part[0] == '#': + # python, C/C++ comment, might not be indented correctly + return None + + if '\t' in indent_part and ' ' in indent_part: + # mixed mode + mo = self.mixed_re.match( indent_part ) + if not mo: + # line is not composed of '\t\t\t ', ignore it + return None + mixed_mode = True + tab_part = mo.group(1) + space_part = mo.group(2) + + if mixed_mode: + if len(space_part) >= 8: + # this is not mixed mode, this is garbage ! + return None + return (LineType.Mixed, tab_part, space_part ) + + if '\t' in indent_part: + return (LineType.TabOnly, indent_part) + + if ' ' in indent_part: + if len(indent_part) < 8: + # this could be mixed mode too + return (LineType.BeginSpace, indent_part) + else: + # this is really a line indented with spaces + return (LineType.SpaceOnly, indent_part ) + + assert False, 'We should never get there !' + + def analyse_line_indentation( self, line ): + previous_line_info = self.previous_line_info + current_line_info = self.analyse_line_type( line ) + self.previous_line_info = current_line_info + + if current_line_info == None or previous_line_info == None: + deepdbg('analyse_line_indentation: Not enough line info to analyse line: %s, %s' % (str(previous_line_info), str(current_line_info))) + return + + t = (previous_line_info[0], current_line_info[0]) + deepdbg( 'analyse_line_indentation: Indent analysis: %s %s' % t ) + if (t == (LineType.TabOnly, LineType.TabOnly) + or t == (LineType.NoIndent, LineType.TabOnly) ): + if len(current_line_info[1]) - len(previous_line_info[1]) == 1 : + self.lines['tab'] += 1 + return 'tab' + + elif (t == (LineType.SpaceOnly, LineType.SpaceOnly) + or t == (LineType.BeginSpace, LineType.SpaceOnly) + or t == (LineType.NoIndent, LineType.SpaceOnly) ): + nb_space = len(current_line_info[1]) - len(previous_line_info[1]) + if 1 < nb_space <= 8: + key = 'space%d' % nb_space + self.lines[key] += 1 + return key + + elif (t == (LineType.BeginSpace, LineType.BeginSpace) + or t == (LineType.NoIndent, LineType.BeginSpace) ): + nb_space = len(current_line_info[1]) - len(previous_line_info[1]) + if 1 < nb_space <= 8: + key1 = 'space%d' % nb_space + key2 = 'mixed%d' % nb_space + self.lines[ key1 ] += 1 + self.lines[ key2 ] += 1 + return key1 + + elif t == (LineType.BeginSpace, LineType.TabOnly): + # we assume that mixed indentation used 8 characters tabs + if len(current_line_info[1]) == 1: + # more than one tab on the line --> not mixed mode ! + nb_space = len(current_line_info[1])*8 - len(previous_line_info[1]) + if 1 < nb_space <= 8: + key = 'mixed%d' % nb_space + self.lines[ key ] += 1 + return key + + elif t == (LineType.TabOnly, LineType.Mixed): + tab_part, space_part = tuple(current_line_info[1:3]) + if len(previous_line_info[1]) == len(tab_part): + nb_space = len(space_part) + if 1 < nb_space <= 8: + key = 'mixed%d' % nb_space + self.lines[ key ] += 1 + return key + + elif t == (LineType.Mixed, LineType.TabOnly): + tab_part, space_part = previous_line_info[1:3] + if len(tab_part)+1 == len(current_line_info[1]): + nb_space = 8-len(space_part) + if 1 < nb_space <= 8: + key = 'mixed%d' % nb_space + self.lines[ key ] += 1 + return key + else: + pass + + return None + + def results( self ): + dbg( "Nb of scanned lines : %d" % self.nb_processed_lines ) + dbg( "Nb of indent hint : %d" % self.nb_indent_hint ) + dbg( "Collected data:" ) + for key in self.lines: + if self.lines[key] > 0: + dbg( '%s: %d' % (key, self.lines[key] ) ) + + max_line_space = max( [ self.lines['space%d'%i] for i in range(2,9) ] ) + max_line_mixed = max( [ self.lines['mixed%d'%i] for i in range(2,9) ] ) + max_line_tab = self.lines['tab'] + + dbg( 'max_line_space: %d' % max_line_space ) + dbg( 'max_line_mixed: %d' % max_line_mixed ) + dbg( 'max_line_tab: %d' % max_line_tab ) + + ### Result analysis + # + # 1. Space indented file + # - lines indented with less than 8 space will fill mixed and space array + # - lines indented with 8 space or more will fill only the space array + # - almost no lines indented with tab + # + # => more lines with space than lines with mixed + # => more a lot more lines with space than tab + # + # 2. Tab indented file + # - most lines will be tab only + # - very few lines as mixed + # - very few lines as space only + # + # => a lot more lines with tab than lines with mixed + # => a lot more lines with tab than lines with space + # + # 3. Mixed tab/space indented file + # - some lines are tab-only (lines with exactly 8 step indentation) + # - some lines are space only (less than 8 space) + # - all other lines are mixed + # + # If mixed is tab + 2 space indentation: + # - a lot more lines with mixed than with tab + # If mixed is tab + 4 space indentation + # - as many lines with mixed than with tab + # + # If no lines exceed 8 space, there will be only lines with space + # and tab but no lines with mixed. Impossible to detect mixed indentation + # in this case, the file looks like it's actually indented as space only + # and will be detected so. + # + # => same or more lines with mixed than lines with tab only + # => same or more lines with mixed than lines with space only + # + + + result = None + + # Detect space indented file + if max_line_space >= max_line_mixed and max_line_space > max_line_tab: + nb = 0 + indent_value = None + for i in range(8,1,-1): + if self.lines['space%d'%i] > int( nb * 1.1 ) : # give a 10% threshold + indent_value = i + nb = self.lines[ 'space%d' % indent_value ] + + if indent_value == None: # no lines + result = self.default_result + else: + result = ('space', indent_value ) + + # Detect tab files + elif max_line_tab > max_line_mixed and max_line_tab > max_line_space: + result = ('tab', DEFAULT_TAB_WIDTH ) + + # Detect mixed files + elif max_line_mixed >= max_line_tab and max_line_mixed > max_line_space: + nb = 0 + indent_value = None + for i in range(8,1,-1): + if self.lines['mixed%d'%i] > int( nb * 1.1 ) : # give a 10% threshold + indent_value = i + nb = self.lines[ 'mixed%d' % indent_value ] + + if indent_value == None: # no lines + result = self.default_result + else: + result = ('mixed', (8,indent_value) ) + + else: + # not enough information to make a decision + result = self.default_result + + info( "Result: %s" % str( result ) ) + return result + + def __str__ (self): + itype, ival = self.results() + if itype != 'mixed': + return '%s %d' % (itype, ival) + else: + itab, ispace = ival + return '%s tab %d space %d' % (itype, itab, ispace) + + + def vim_output( self ): + result = self.results() + indent_type, n = result + if indent_type == "space": + # spaces: + # => set sts to the number of spaces + # => set tabstop to the number of spaces + # => expand tabs to spaces + # => set shiftwidth to the number of spaces + return "set sts=%d | set tabstop=%d | set expandtab | set shiftwidth=%d \" (%s %d)" % (n,n,n,indent_type,n) + + elif indent_type == "tab": + # tab: + # => set sts to 0 + # => set tabstop to preferred value + # => set expandtab to false + # => set shiftwidth to tabstop + return "set sts=0 | set tabstop=%d | set noexpandtab | set shiftwidth=%d \" (%s)" % (DEFAULT_TAB_WIDTH, DEFAULT_TAB_WIDTH, indent_type ) + + if indent_type == 'mixed': + tab_indent, space_indent = n + # tab: + # => set sts to 0 + # => set tabstop to tab_indent + # => set expandtab to false + # => set shiftwidth to space_indent + return "set sts=4 | set tabstop=%d | set noexpandtab | set shiftwidth=%d \" (%s %d)" % (tab_indent, space_indent, indent_type, space_indent ) + + + +def main(): + VIM_OUTPUT = 0 + + file_list = [] + for opt in sys.argv[1:]: + if opt == "--vim-output": + VIM_OUTPUT = 1 + elif opt == "--verbose" or opt == '-v': + IndentFinder.VERBOSITY += 1 + elif opt == "--version": + print 'IndentFinder v%s' % VERSION + return + elif opt[0] == "-": + print help % sys.argv[0] + return + else: + file_list.append( opt ) + + fi = IndentFinder() + + if len(file_list) > 1: + # multiple files + for fname in file_list: + fi.clear() + fi.parse_file( fname ) + if VIM_OUTPUT: + print "%s : %s" % (fname, fi.vim_output()) + else: + print "%s : %s" % (fname, str(fi)) + return + + else: + # only one file, don't print filename + fi.parse_file_list( file_list ) + if VIM_OUTPUT: + sys.stdout.write( fi.vim_output() ) + else: + print str(fi) + + +if __name__ == "__main__": + main() diff --git a/.vim/plugin/indent_finder.vim b/.vim/plugin/indent_finder.vim new file mode 100755 index 0000000..c48d2c1 --- /dev/null +++ b/.vim/plugin/indent_finder.vim @@ -0,0 +1,10 @@ + +augroup IndentFinder + au! IndentFinder + au BufRead *.* let b:indent_finder_result = system('python -c "import indent_finder; indent_finder.main()" --vim-output "' . expand('%') . '"' ) + au BufRead *.* execute b:indent_finder_result + + " Uncomment the next line to see which indentation is applied on all your loaded files + " au BufRead *.* echo "Indent Finder: " . b:indent_finder_result +augroup End + diff --git a/.vimrc b/.vimrc index 61b5af9..cb06fc9 100644 --- a/.vimrc +++ b/.vimrc @@ -173,6 +173,9 @@ set number if has("autocmd") " enable file type detection and do language-dependent indenting filetype plugin indent on + " detect indentation see http://www.freehackers.org/Indent_Finder + autocmd BufReadPost * execute system ('python ~/.vim/plugin/indent_finder.py --vim-output "' . expand('%') . '"' ) + else " auto-indent set autoindent