[vim] Add a plugin for detecting indent

2010-10-22 10:03:52 +02:00 · 2010-10-22 10:03:52 +02:00 · c5ab496deb
commit c5ab496deb
parent 81553978aa
4 changed files with 496 additions and 0 deletions
--- a/.vim/filetype.vim
+++ b/.vim/filetype.vim
@ -2,3 +2,8 @@ augroup filetypedetect
    au BufNewFile,BufRead .tmux.conf*,tmux.conf* setf tmux
 augroup END

+
+augroup filetypedetect
+	" Mail
+	autocmd BufRead,BufNewFile *mutt-*              setfiletype mail
+augroup END
--- a/.vim/plugin/indent_finder.py
+++ b/.vim/plugin/indent_finder.py
@ -0,0 +1,478 @@
+# 
+# Indentation finder, by Philippe Fremy <phil at freehackers dot org>
+# Copyright 2002-2008 Philippe Fremy
+#
+# This program is distributed under the BSD license. You should have received
+# a copy of the file LICENSE.txt along with this software.
+#
+
+import sys
+import re
+
+help = \
+"""Usage : %s [ --vim-output ] [ --verbose ] file1 file2 ... fileN
+
+Display indentation used in the list of files. Possible answers are (with X
+being the number of spaces used for indentation):
+space X   
+tab 8
+mixed tab X space Y
+
+mixed means that indentation style is tab at the beginning of the line (tab
+being 8 positions) and then spaces to do the indentation, unless you reach 8
+spaces which are replaced by a tab. This is the vim source file indentation
+for example. In my opinion, this is the worst possible style.
+
+--vim-output: output suitable to use inside vim:
+set sts=0 | set tabstop=4 | set noexpandtab | set shiftwidth=4
+
+"""
+
+VERSION='1.4'
+
+### Used when indentation is tab, to set tabstop in vim
+DEFAULT_TAB_WIDTH = 4
+
+### default values for files where indentation is not meaningful (empty files)
+# possible values:
+# DEFAULT_RESULT = ('space', 4 )
+# DEFAULT_RESULT = ('space', 2 )
+# DEFAULT_RESULT = ('space', 8 )
+# DEFAULT_RESULT = ('tab', DEFAULT_TAB_WIDTH )
+
+DEFAULT_RESULT = ('space', 4 )
+
+VERBOSE_QUIET   = 0
+VERBOSE_INFO    = 1
+VERBOSE_DEBUG   = 2
+VERBOSE_DEEP_DEBUG   = 3
+
+DEFAULT_VERBOSITY = VERBOSE_QUIET
+
+###
+class LineType:
+    NoIndent        = 'NoIndent'
+    SpaceOnly       = 'SpaceOnly'
+    TabOnly         = 'TabOnly'
+    Mixed           = 'Mixed'
+    BeginSpace      = 'BeginSpace'
+
+def info( s ): log( VERBOSE_INFO, s )
+def dbg( s ): log( VERBOSE_DEBUG, s )
+def deepdbg( s ): log( VERBOSE_DEEP_DEBUG, s )
+
+def log( level, s ):
+    if level <= IndentFinder.VERBOSITY:
+        print s
+
+class IndentFinder:
+    """
+    IndentFinder reports the indentation used in a source file. Its approach
+    is not tied to any particular language. It was tested successfully with
+    python, C, C++ and Java code.
+
+    How does it work ?
+
+    It scans each line of the entry file for a space character (white space or
+    tab) repeated until a non space character is found. Such a line
+    is considered to be a properly indented line of code. Blank lines and
+    comments line (starting with # or /* or * ) are ignored. Lines coming 
+    after a line ending in '\\' have higher chance of being not properly 
+    indented, and are thus ignored too.
+
+    Only the increment in indentation are fed in. Dedentation or maintaining
+    the same indentation is not taken into account when analysing a file. Increment
+    in indentation from zero indentation to some indentation is also ignored because
+    it's wrong in many cases (header file with many structures for example, do not always
+    obey the indentation of the rest of the code).
+
+    Each line is analysed as:
+    - SpaceOnly: indentation of more than 8 space
+    - TabOnly: indentation of tab only
+    - Mixed: indentation of tab, then less than 8 spaces
+    - BeginSpace: indentation of less than 8 space, that could be either a mixed indentation
+        or a pure space indentation.
+    - non-significant
+
+    Then two consecutive significant lines are then considered. The only valid combinations are:
+    - (NoIndent, BeginSpace)    => space or mixed
+    - (NoIndent, Tab)           => tab
+    - (BeginSpace, BeginSpace)  => space or mixed
+    - (BeginSpace, SpaceOnly)   => space
+    - (SpaceOnly, SpaceOnly)    => space
+    - (TabOnly, TabOnly)        => tab
+    - (TabOnly, Mixed)          => mixed
+    - (Mixed, TabOnly)          => mixed
+
+    The increment in number of spaces is then recorded.
+
+    At the end, the number of lines with space indentation, mixed space and tab indentation
+    are compared and a decision is made.
+
+    If no decision can be made, DEFAULT_RESULT is returned.
+
+    If IndentFinder ever reports wrong indentation, send me immediately a
+    mail, if possible with the offending file.
+    """
+
+    def __init__(self, default_result=DEFAULT_RESULT):
+        self.clear()
+        self.default_result = default_result
+
+    VERBOSITY = DEFAULT_VERBOSITY
+
+    def parse_file_list( self, file_list ):
+        for fname in file_list:
+            self.parse_file( fname )
+
+    def parse_file( self, fname ):
+        f = open( fname )
+        l = f.readline()
+        while( l ):
+            self.analyse_line( l )
+            l = f.readline()
+        f.close()
+
+    def clear( self ):
+        self.lines = {}
+        for i in range(2,9): self.lines['space%d' % i] = 0
+        for i in range(2,9): self.lines['mixed%d' % i] = 0
+        self.lines['tab'] = 0
+
+        self.nb_processed_lines = 0
+        self.nb_indent_hint = 0
+        self.indent_re  = re.compile( "^([ \t]+)([^ \t]+)" )
+        self.mixed_re  = re.compile(  "^(\t+)( +)$" )
+        self.skip_next_line = False
+        self.previous_line_info = None
+
+    def analyse_line( self, line ):
+        if line[-1:] == '\n':
+            line = line[:-1]
+        deepdbg( 'analyse_line: "%s"' % line.replace(' ', '.' ).replace('\t','\\t') )
+        self.nb_processed_lines += 1
+
+        skip_current_line = self.skip_next_line
+        self.skip_next_line = False
+        if line[-1:] == '\\': 
+            deepdbg( 'analyse_line: Ignoring next line!' )
+            # skip lines after lines ending in \
+            self.skip_next_line = True
+
+        if skip_current_line: 
+            deepdbg( 'analyse_line: Ignoring current line!' )
+            return
+
+        ret = self.analyse_line_indentation( line )
+        if ret:
+            self.nb_indent_hint += 1
+        deepdbg( 'analyse_line: Result of line analysis: %s' % str(ret) )
+        return ret
+
+    def analyse_line_type( self, line ):
+        '''Analyse the type of line and return (LineType, <indentation part of
+        the line>).
+
+        The function will reject improperly formatted lines (mixture of tab
+        and space for example) and comment lines.
+        '''
+        mixed_mode = False
+        tab_part = ''
+        space_part = ''
+
+        if len(line) > 0 and line[0] != ' ' and line[0] != '\t':
+            return (LineType.NoIndent, '') 
+
+        mo = self.indent_re.match( line )
+        if not mo: 
+            deepdbg( 'analyse_line_type: line is not indented' )
+            return None
+
+        indent_part = mo.group(1)
+        text_part = mo.group(2)
+            
+        deepdbg( 'analyse_line_type: indent_part="%s" text_part="%s"' % 
+            (indent_part.replace(' ', '.').replace('\t','\\t').replace('\n', '\\n' ),
+                text_part ) )
+
+        if text_part[0] == '*': 
+            # continuation of a C/C++ comment, unlikely to be indented correctly
+            return None
+
+        if text_part[0:2] == '/*' or text_part[0] == '#': 
+            # python, C/C++ comment, might not be indented correctly
+            return None
+
+        if '\t' in indent_part and ' ' in indent_part:
+            # mixed mode
+            mo = self.mixed_re.match( indent_part )
+            if not mo:
+                # line is not composed of '\t\t\t    ', ignore it
+                return None
+            mixed_mode = True
+            tab_part = mo.group(1)
+            space_part = mo.group(2)
+            
+        if mixed_mode:
+            if len(space_part) >= 8:
+                # this is not mixed mode, this is garbage !
+                return None
+            return (LineType.Mixed, tab_part, space_part )
+
+        if '\t' in indent_part:
+            return (LineType.TabOnly, indent_part)
+
+        if ' ' in indent_part:
+            if len(indent_part) < 8:
+                # this could be mixed mode too
+                return (LineType.BeginSpace, indent_part)
+            else:
+                # this is really a line indented with spaces
+                return (LineType.SpaceOnly, indent_part )
+
+        assert False, 'We should never get there !'
+
+    def analyse_line_indentation( self, line ):
+        previous_line_info = self.previous_line_info
+        current_line_info = self.analyse_line_type( line )
+        self.previous_line_info = current_line_info
+
+        if current_line_info == None or previous_line_info == None:
+            deepdbg('analyse_line_indentation: Not enough line info to analyse line: %s, %s' % (str(previous_line_info), str(current_line_info)))
+            return 
+        
+        t = (previous_line_info[0], current_line_info[0])
+        deepdbg( 'analyse_line_indentation: Indent analysis: %s %s' % t )
+        if (t == (LineType.TabOnly, LineType.TabOnly)
+            or t == (LineType.NoIndent, LineType.TabOnly) ):
+            if len(current_line_info[1]) - len(previous_line_info[1]) == 1 :
+                self.lines['tab'] += 1
+                return 'tab'
+
+        elif (t == (LineType.SpaceOnly, LineType.SpaceOnly)
+              or t == (LineType.BeginSpace, LineType.SpaceOnly)
+              or t == (LineType.NoIndent, LineType.SpaceOnly) ):
+            nb_space = len(current_line_info[1]) - len(previous_line_info[1])
+            if 1 < nb_space <= 8:
+                key = 'space%d' % nb_space 
+                self.lines[key] += 1
+                return key
+
+        elif (t == (LineType.BeginSpace, LineType.BeginSpace)
+              or t == (LineType.NoIndent, LineType.BeginSpace) ):
+            nb_space = len(current_line_info[1]) - len(previous_line_info[1])
+            if 1 < nb_space <= 8:
+                key1 = 'space%d' % nb_space 
+                key2 = 'mixed%d' % nb_space 
+                self.lines[ key1 ] += 1
+                self.lines[ key2 ] += 1
+                return key1
+
+        elif t == (LineType.BeginSpace, LineType.TabOnly):
+            # we assume that mixed indentation used 8 characters tabs
+            if len(current_line_info[1]) == 1:
+                # more than one tab on the line --> not mixed mode !
+                nb_space = len(current_line_info[1])*8 - len(previous_line_info[1])
+                if 1 < nb_space <= 8:
+                    key = 'mixed%d' % nb_space
+                    self.lines[ key ] += 1
+                    return key
+
+        elif t == (LineType.TabOnly, LineType.Mixed):
+            tab_part, space_part = tuple(current_line_info[1:3])
+            if len(previous_line_info[1]) == len(tab_part):
+                nb_space = len(space_part)
+                if 1 < nb_space <= 8:
+                    key = 'mixed%d' % nb_space
+                    self.lines[ key ] += 1
+                    return key
+
+        elif t == (LineType.Mixed, LineType.TabOnly):
+            tab_part, space_part = previous_line_info[1:3]
+            if len(tab_part)+1 == len(current_line_info[1]):
+                nb_space = 8-len(space_part)
+                if 1 < nb_space <= 8:
+                    key = 'mixed%d' % nb_space
+                    self.lines[ key ] += 1
+                    return key
+        else:
+            pass
+
+        return None
+        
+    def results( self ):
+        dbg( "Nb of scanned lines : %d" % self.nb_processed_lines )
+        dbg( "Nb of indent hint : %d" % self.nb_indent_hint )
+        dbg( "Collected data:" )
+        for key in self.lines:
+            if self.lines[key] > 0:
+                dbg( '%s: %d' % (key, self.lines[key] ) )
+
+        max_line_space = max( [ self.lines['space%d'%i] for i in range(2,9) ] )
+        max_line_mixed = max( [ self.lines['mixed%d'%i] for i in range(2,9) ] )
+        max_line_tab = self.lines['tab']
+
+        dbg( 'max_line_space: %d' % max_line_space )
+        dbg( 'max_line_mixed: %d' % max_line_mixed )
+        dbg( 'max_line_tab: %d' % max_line_tab )
+
+        ### Result analysis
+        #
+        # 1. Space indented file
+        #    - lines indented with less than 8 space will fill mixed and space array
+        #    - lines indented with 8 space or more will fill only the space array
+        #    - almost no lines indented with tab
+        #
+        # => more lines with space than lines with mixed
+        # => more a lot more lines with space than tab
+        #
+        # 2. Tab indented file
+        #    - most lines will be tab only
+        #    - very few lines as mixed
+        #    - very few lines as space only
+        #
+        # => a lot more lines with tab than lines with mixed
+        # => a lot more lines with tab than lines with space
+        #
+        # 3. Mixed tab/space indented file
+        #    - some lines are tab-only (lines with exactly 8 step indentation)
+        #    - some lines are space only (less than 8 space)
+        #    - all other lines are mixed
+        #
+        # If mixed is tab + 2 space indentation:
+        #     - a lot more lines with mixed than with tab
+        # If mixed is tab + 4 space indentation
+        #     - as many lines with mixed than with tab
+        #
+        # If no lines exceed 8 space, there will be only lines with space
+        # and tab but no lines with mixed. Impossible to detect mixed indentation
+        # in this case, the file looks like it's actually indented as space only
+        # and will be detected so.
+        #
+        # => same or more lines with mixed than lines with tab only
+        # => same or more lines with mixed than lines with space only
+        #
+
+
+        result = None
+
+        # Detect space indented file
+        if max_line_space >= max_line_mixed and max_line_space > max_line_tab:
+            nb = 0
+            indent_value = None
+            for i in range(8,1,-1):
+                if self.lines['space%d'%i] > int( nb * 1.1 ) : # give a 10% threshold
+                    indent_value = i
+                    nb = self.lines[ 'space%d' % indent_value ]
+
+            if indent_value == None: # no lines
+                result = self.default_result
+            else:
+                result = ('space', indent_value )
+
+        # Detect tab files
+        elif max_line_tab > max_line_mixed and max_line_tab > max_line_space:
+            result = ('tab', DEFAULT_TAB_WIDTH )
+
+        # Detect mixed files
+        elif max_line_mixed >= max_line_tab and max_line_mixed > max_line_space:
+            nb = 0
+            indent_value = None
+            for i in range(8,1,-1):
+                if self.lines['mixed%d'%i] > int( nb * 1.1 ) : # give a 10% threshold
+                    indent_value = i
+                    nb = self.lines[ 'mixed%d' % indent_value ]
+
+            if indent_value == None: # no lines
+                result = self.default_result
+            else:
+                result = ('mixed', (8,indent_value) )
+
+        else:
+            # not enough information to make a decision
+            result = self.default_result
+
+        info( "Result: %s" % str( result ) )
+        return result
+
+    def __str__ (self):
+        itype, ival = self.results()
+        if itype != 'mixed':
+            return '%s %d' % (itype, ival)
+        else:
+            itab, ispace = ival
+            return '%s tab %d space %d' % (itype, itab, ispace)
+        
+
+    def vim_output( self ):
+        result = self.results()
+        indent_type, n = result
+        if indent_type == "space":
+            # spaces: 
+            #   => set sts to the number of spaces
+            #   => set tabstop to the number of spaces
+            #   => expand tabs to spaces
+            #   => set shiftwidth to the number of spaces
+            return "set sts=%d | set tabstop=%d | set expandtab | set shiftwidth=%d \" (%s %d)" % (n,n,n,indent_type,n)
+
+        elif indent_type == "tab":
+            # tab:
+            #   => set sts to 0
+            #   => set tabstop to preferred value
+            #   => set expandtab to false
+            #   => set shiftwidth to tabstop
+            return "set sts=0 | set tabstop=%d | set noexpandtab | set shiftwidth=%d \" (%s)" % (DEFAULT_TAB_WIDTH, DEFAULT_TAB_WIDTH, indent_type )
+
+        if indent_type == 'mixed':
+            tab_indent, space_indent = n
+            # tab:
+            #   => set sts to 0
+            #   => set tabstop to tab_indent
+            #   => set expandtab to false
+            #   => set shiftwidth to space_indent
+            return "set sts=4 | set tabstop=%d | set noexpandtab | set shiftwidth=%d \" (%s %d)" % (tab_indent, space_indent, indent_type, space_indent )
+
+
+
+def main():
+    VIM_OUTPUT = 0
+
+    file_list = []
+    for opt in sys.argv[1:]:
+        if opt == "--vim-output": 
+            VIM_OUTPUT = 1
+        elif opt == "--verbose" or opt == '-v': 
+            IndentFinder.VERBOSITY += 1
+        elif opt == "--version": 
+            print 'IndentFinder v%s' % VERSION
+            return
+        elif opt[0] == "-": 
+            print help % sys.argv[0]
+            return
+        else:
+            file_list.append( opt )
+
+    fi = IndentFinder()
+
+    if len(file_list) > 1:
+        # multiple files
+        for fname in file_list:
+            fi.clear()
+            fi.parse_file( fname )
+            if VIM_OUTPUT:
+                print "%s : %s" % (fname, fi.vim_output())
+            else:
+                print "%s : %s" % (fname, str(fi))
+        return
+
+    else:
+        # only one file, don't print filename
+        fi.parse_file_list( file_list )
+        if VIM_OUTPUT:
+            sys.stdout.write( fi.vim_output() )
+        else:
+            print str(fi)
+
+
+if __name__ == "__main__":
+    main()
--- a/.vim/plugin/indent_finder.vim
+++ b/.vim/plugin/indent_finder.vim
@ -0,0 +1,10 @@
+
+augroup IndentFinder
+    au! IndentFinder
+    au BufRead *.* let b:indent_finder_result = system('python -c "import indent_finder; indent_finder.main()" --vim-output "' . expand('%') . '"' )
+    au BufRead *.* execute b:indent_finder_result
+
+    " Uncomment the next line to see which indentation is applied on all your loaded files
+    " au BufRead *.* echo "Indent Finder: " . b:indent_finder_result
+augroup End
+
--- a/.vimrc
+++ b/.vimrc
@ -173,6 +173,9 @@ set number
 if has("autocmd")
 	" enable file type detection and do language-dependent indenting
 	filetype plugin indent on
+	" detect indentation see http://www.freehackers.org/Indent_Finder
+	autocmd BufReadPost * execute system ('python ~/.vim/plugin/indent_finder.py --vim-output "' . expand('%') . '"' )
+
 else
 	" auto-indent
 	set autoindent