2010-10-22 10:03:52 +02:00
#
# Indentation finder, by Philippe Fremy <phil at freehackers dot org>
# Copyright 2002-2008 Philippe Fremy
#
# This program is distributed under the BSD license. You should have received
# a copy of the file LICENSE.txt along with this software.
#
import sys
import re
2018-03-07 17:15:11 +01:00
import argparse
2010-10-22 10:03:52 +02:00
help = \
""" Usage : %s [ --vim-output ] [ --verbose ] file1 file2 ... fileN
Display indentation used in the list of files . Possible answers are ( with X
being the number of spaces used for indentation ) :
space X
tab 8
mixed tab X space Y
mixed means that indentation style is tab at the beginning of the line ( tab
being 8 positions ) and then spaces to do the indentation , unless you reach 8
spaces which are replaced by a tab . This is the vim source file indentation
for example . In my opinion , this is the worst possible style .
- - vim - output : output suitable to use inside vim :
set sts = 0 | set tabstop = 4 | set noexpandtab | set shiftwidth = 4
"""
VERSION = ' 1.4 '
### Used when indentation is tab, to set tabstop in vim
2014-03-24 17:46:29 +01:00
DEFAULT_TAB_WIDTH = 8
2010-10-22 10:03:52 +02:00
### default values for files where indentation is not meaningful (empty files)
# possible values:
# DEFAULT_RESULT = ('space', 4 )
# DEFAULT_RESULT = ('space', 2 )
# DEFAULT_RESULT = ('space', 8 )
# DEFAULT_RESULT = ('tab', DEFAULT_TAB_WIDTH )
2014-03-24 17:46:29 +01:00
DEFAULT_RESULT = ( ' space ' , 8 )
2010-10-22 10:03:52 +02:00
VERBOSE_QUIET = 0
VERBOSE_INFO = 1
VERBOSE_DEBUG = 2
VERBOSE_DEEP_DEBUG = 3
DEFAULT_VERBOSITY = VERBOSE_QUIET
2020-11-16 12:30:29 +01:00
MAX_LINE = 1024
2010-10-22 10:03:52 +02:00
###
class LineType :
NoIndent = ' NoIndent '
SpaceOnly = ' SpaceOnly '
TabOnly = ' TabOnly '
Mixed = ' Mixed '
BeginSpace = ' BeginSpace '
def info ( s ) : log ( VERBOSE_INFO , s )
def dbg ( s ) : log ( VERBOSE_DEBUG , s )
def deepdbg ( s ) : log ( VERBOSE_DEEP_DEBUG , s )
def log ( level , s ) :
if level < = IndentFinder . VERBOSITY :
2022-10-03 10:19:00 +02:00
print ( s )
2010-10-22 10:03:52 +02:00
class IndentFinder :
"""
IndentFinder reports the indentation used in a source file . Its approach
is not tied to any particular language . It was tested successfully with
python , C , C + + and Java code .
How does it work ?
It scans each line of the entry file for a space character ( white space or
tab ) repeated until a non space character is found . Such a line
is considered to be a properly indented line of code . Blank lines and
comments line ( starting with # or /* or * ) are ignored. Lines coming
after a line ending in ' \\ ' have higher chance of being not properly
indented , and are thus ignored too .
Only the increment in indentation are fed in . Dedentation or maintaining
the same indentation is not taken into account when analysing a file . Increment
in indentation from zero indentation to some indentation is also ignored because
it ' s wrong in many cases (header file with many structures for example, do not always
obey the indentation of the rest of the code ) .
Each line is analysed as :
- SpaceOnly : indentation of more than 8 space
- TabOnly : indentation of tab only
- Mixed : indentation of tab , then less than 8 spaces
- BeginSpace : indentation of less than 8 space , that could be either a mixed indentation
or a pure space indentation .
- non - significant
Then two consecutive significant lines are then considered . The only valid combinations are :
- ( NoIndent , BeginSpace ) = > space or mixed
- ( NoIndent , Tab ) = > tab
- ( BeginSpace , BeginSpace ) = > space or mixed
- ( BeginSpace , SpaceOnly ) = > space
- ( SpaceOnly , SpaceOnly ) = > space
- ( TabOnly , TabOnly ) = > tab
- ( TabOnly , Mixed ) = > mixed
- ( Mixed , TabOnly ) = > mixed
The increment in number of spaces is then recorded .
At the end , the number of lines with space indentation , mixed space and tab indentation
are compared and a decision is made .
If no decision can be made , DEFAULT_RESULT is returned .
If IndentFinder ever reports wrong indentation , send me immediately a
mail , if possible with the offending file .
"""
def __init__ ( self , default_result = DEFAULT_RESULT ) :
self . clear ( )
self . default_result = default_result
VERBOSITY = DEFAULT_VERBOSITY
def parse_file_list ( self , file_list ) :
for fname in file_list :
self . parse_file ( fname )
def parse_file ( self , fname ) :
f = open ( fname )
l = f . readline ( )
2020-11-16 12:30:29 +01:00
i = 0
2010-10-22 10:03:52 +02:00
while ( l ) :
self . analyse_line ( l )
l = f . readline ( )
2020-11-16 12:30:29 +01:00
if i > MAX_LINE :
break
i + = 1
2010-10-22 10:03:52 +02:00
f . close ( )
def clear ( self ) :
self . lines = { }
for i in range ( 2 , 9 ) : self . lines [ ' space %d ' % i ] = 0
for i in range ( 2 , 9 ) : self . lines [ ' mixed %d ' % i ] = 0
self . lines [ ' tab ' ] = 0
self . nb_processed_lines = 0
self . nb_indent_hint = 0
self . indent_re = re . compile ( " ^([ \t ]+)([^ \t ]+) " )
self . mixed_re = re . compile ( " ^( \t +)( +)$ " )
self . skip_next_line = False
self . previous_line_info = None
def analyse_line ( self , line ) :
if line [ - 1 : ] == ' \n ' :
line = line [ : - 1 ]
deepdbg ( ' analyse_line: " %s " ' % line . replace ( ' ' , ' . ' ) . replace ( ' \t ' , ' \\ t ' ) )
self . nb_processed_lines + = 1
skip_current_line = self . skip_next_line
self . skip_next_line = False
if line [ - 1 : ] == ' \\ ' :
deepdbg ( ' analyse_line: Ignoring next line! ' )
# skip lines after lines ending in \
self . skip_next_line = True
if skip_current_line :
deepdbg ( ' analyse_line: Ignoring current line! ' )
return
ret = self . analyse_line_indentation ( line )
if ret :
self . nb_indent_hint + = 1
deepdbg ( ' analyse_line: Result of line analysis: %s ' % str ( ret ) )
return ret
def analyse_line_type ( self , line ) :
''' Analyse the type of line and return (LineType, <indentation part of
the line > ) .
The function will reject improperly formatted lines ( mixture of tab
and space for example ) and comment lines .
'''
mixed_mode = False
tab_part = ' '
space_part = ' '
if len ( line ) > 0 and line [ 0 ] != ' ' and line [ 0 ] != ' \t ' :
return ( LineType . NoIndent , ' ' )
mo = self . indent_re . match ( line )
if not mo :
deepdbg ( ' analyse_line_type: line is not indented ' )
return None
indent_part = mo . group ( 1 )
text_part = mo . group ( 2 )
deepdbg ( ' analyse_line_type: indent_part= " %s " text_part= " %s " ' %
( indent_part . replace ( ' ' , ' . ' ) . replace ( ' \t ' , ' \\ t ' ) . replace ( ' \n ' , ' \\ n ' ) ,
text_part ) )
if text_part [ 0 ] == ' * ' :
# continuation of a C/C++ comment, unlikely to be indented correctly
return None
if text_part [ 0 : 2 ] == ' /* ' or text_part [ 0 ] == ' # ' :
# python, C/C++ comment, might not be indented correctly
return None
if ' \t ' in indent_part and ' ' in indent_part :
# mixed mode
mo = self . mixed_re . match ( indent_part )
if not mo :
# line is not composed of '\t\t\t ', ignore it
return None
mixed_mode = True
tab_part = mo . group ( 1 )
space_part = mo . group ( 2 )
if mixed_mode :
if len ( space_part ) > = 8 :
# this is not mixed mode, this is garbage !
return None
return ( LineType . Mixed , tab_part , space_part )
if ' \t ' in indent_part :
return ( LineType . TabOnly , indent_part )
if ' ' in indent_part :
if len ( indent_part ) < 8 :
# this could be mixed mode too
return ( LineType . BeginSpace , indent_part )
else :
# this is really a line indented with spaces
return ( LineType . SpaceOnly , indent_part )
assert False , ' We should never get there ! '
def analyse_line_indentation ( self , line ) :
previous_line_info = self . previous_line_info
current_line_info = self . analyse_line_type ( line )
self . previous_line_info = current_line_info
if current_line_info == None or previous_line_info == None :
deepdbg ( ' analyse_line_indentation: Not enough line info to analyse line: %s , %s ' % ( str ( previous_line_info ) , str ( current_line_info ) ) )
return
t = ( previous_line_info [ 0 ] , current_line_info [ 0 ] )
deepdbg ( ' analyse_line_indentation: Indent analysis: %s %s ' % t )
if ( t == ( LineType . TabOnly , LineType . TabOnly )
or t == ( LineType . NoIndent , LineType . TabOnly ) ) :
if len ( current_line_info [ 1 ] ) - len ( previous_line_info [ 1 ] ) == 1 :
self . lines [ ' tab ' ] + = 1
return ' tab '
elif ( t == ( LineType . SpaceOnly , LineType . SpaceOnly )
or t == ( LineType . BeginSpace , LineType . SpaceOnly )
or t == ( LineType . NoIndent , LineType . SpaceOnly ) ) :
nb_space = len ( current_line_info [ 1 ] ) - len ( previous_line_info [ 1 ] )
if 1 < nb_space < = 8 :
key = ' space %d ' % nb_space
self . lines [ key ] + = 1
return key
elif ( t == ( LineType . BeginSpace , LineType . BeginSpace )
or t == ( LineType . NoIndent , LineType . BeginSpace ) ) :
nb_space = len ( current_line_info [ 1 ] ) - len ( previous_line_info [ 1 ] )
if 1 < nb_space < = 8 :
key1 = ' space %d ' % nb_space
key2 = ' mixed %d ' % nb_space
self . lines [ key1 ] + = 1
self . lines [ key2 ] + = 1
return key1
elif t == ( LineType . BeginSpace , LineType . TabOnly ) :
# we assume that mixed indentation used 8 characters tabs
if len ( current_line_info [ 1 ] ) == 1 :
# more than one tab on the line --> not mixed mode !
nb_space = len ( current_line_info [ 1 ] ) * 8 - len ( previous_line_info [ 1 ] )
if 1 < nb_space < = 8 :
key = ' mixed %d ' % nb_space
self . lines [ key ] + = 1
return key
elif t == ( LineType . TabOnly , LineType . Mixed ) :
tab_part , space_part = tuple ( current_line_info [ 1 : 3 ] )
if len ( previous_line_info [ 1 ] ) == len ( tab_part ) :
nb_space = len ( space_part )
if 1 < nb_space < = 8 :
key = ' mixed %d ' % nb_space
self . lines [ key ] + = 1
return key
elif t == ( LineType . Mixed , LineType . TabOnly ) :
tab_part , space_part = previous_line_info [ 1 : 3 ]
if len ( tab_part ) + 1 == len ( current_line_info [ 1 ] ) :
nb_space = 8 - len ( space_part )
if 1 < nb_space < = 8 :
key = ' mixed %d ' % nb_space
self . lines [ key ] + = 1
return key
else :
pass
return None
def results ( self ) :
dbg ( " Nb of scanned lines : %d " % self . nb_processed_lines )
dbg ( " Nb of indent hint : %d " % self . nb_indent_hint )
dbg ( " Collected data: " )
for key in self . lines :
if self . lines [ key ] > 0 :
dbg ( ' %s : %d ' % ( key , self . lines [ key ] ) )
max_line_space = max ( [ self . lines [ ' space %d ' % i ] for i in range ( 2 , 9 ) ] )
max_line_mixed = max ( [ self . lines [ ' mixed %d ' % i ] for i in range ( 2 , 9 ) ] )
max_line_tab = self . lines [ ' tab ' ]
dbg ( ' max_line_space: %d ' % max_line_space )
dbg ( ' max_line_mixed: %d ' % max_line_mixed )
dbg ( ' max_line_tab: %d ' % max_line_tab )
### Result analysis
#
# 1. Space indented file
# - lines indented with less than 8 space will fill mixed and space array
# - lines indented with 8 space or more will fill only the space array
# - almost no lines indented with tab
#
# => more lines with space than lines with mixed
# => more a lot more lines with space than tab
#
# 2. Tab indented file
# - most lines will be tab only
# - very few lines as mixed
# - very few lines as space only
#
# => a lot more lines with tab than lines with mixed
# => a lot more lines with tab than lines with space
#
# 3. Mixed tab/space indented file
# - some lines are tab-only (lines with exactly 8 step indentation)
# - some lines are space only (less than 8 space)
# - all other lines are mixed
#
# If mixed is tab + 2 space indentation:
# - a lot more lines with mixed than with tab
# If mixed is tab + 4 space indentation
# - as many lines with mixed than with tab
#
# If no lines exceed 8 space, there will be only lines with space
# and tab but no lines with mixed. Impossible to detect mixed indentation
# in this case, the file looks like it's actually indented as space only
# and will be detected so.
#
# => same or more lines with mixed than lines with tab only
# => same or more lines with mixed than lines with space only
#
result = None
# Detect space indented file
if max_line_space > = max_line_mixed and max_line_space > max_line_tab :
nb = 0
indent_value = None
for i in range ( 8 , 1 , - 1 ) :
if self . lines [ ' space %d ' % i ] > int ( nb * 1.1 ) : # give a 10% threshold
indent_value = i
nb = self . lines [ ' space %d ' % indent_value ]
if indent_value == None : # no lines
result = self . default_result
else :
result = ( ' space ' , indent_value )
# Detect tab files
elif max_line_tab > max_line_mixed and max_line_tab > max_line_space :
result = ( ' tab ' , DEFAULT_TAB_WIDTH )
# Detect mixed files
elif max_line_mixed > = max_line_tab and max_line_mixed > max_line_space :
nb = 0
indent_value = None
for i in range ( 8 , 1 , - 1 ) :
if self . lines [ ' mixed %d ' % i ] > int ( nb * 1.1 ) : # give a 10% threshold
indent_value = i
nb = self . lines [ ' mixed %d ' % indent_value ]
if indent_value == None : # no lines
result = self . default_result
else :
result = ( ' mixed ' , ( 8 , indent_value ) )
else :
# not enough information to make a decision
result = self . default_result
info ( " Result: %s " % str ( result ) )
return result
def __str__ ( self ) :
itype , ival = self . results ( )
if itype != ' mixed ' :
return ' %s %d ' % ( itype , ival )
else :
itab , ispace = ival
return ' %s tab %d space %d ' % ( itype , itab , ispace )
def vim_output ( self ) :
result = self . results ( )
indent_type , n = result
if indent_type == " space " :
# spaces:
# => set sts to the number of spaces
# => set tabstop to the number of spaces
# => expand tabs to spaces
# => set shiftwidth to the number of spaces
return " set sts= %d | set tabstop= %d | set expandtab | set shiftwidth= %d \" ( %s %d ) " % ( n , n , n , indent_type , n )
elif indent_type == " tab " :
# tab:
# => set sts to 0
# => set tabstop to preferred value
# => set expandtab to false
# => set shiftwidth to tabstop
return " set sts=0 | set tabstop= %d | set noexpandtab | set shiftwidth= %d \" ( %s ) " % ( DEFAULT_TAB_WIDTH , DEFAULT_TAB_WIDTH , indent_type )
if indent_type == ' mixed ' :
tab_indent , space_indent = n
# tab:
# => set sts to 0
# => set tabstop to tab_indent
# => set expandtab to false
# => set shiftwidth to space_indent
return " set sts=4 | set tabstop= %d | set noexpandtab | set shiftwidth= %d \" ( %s %d ) " % ( tab_indent , space_indent , indent_type , space_indent )
def main ( ) :
VIM_OUTPUT = 0
2018-03-07 17:15:11 +01:00
parser = argparse . ArgumentParser ( ' File indent finder ' )
parser . add_argument ( ' files ' , metavar = ' FILE ' , nargs = ' + ' , help = ' file to scan ' )
parser . add_argument ( ' --vim-output ' , dest = ' vim ' , default = None , help = ' output suitable to use inside vim ' , action = " store_true " )
parser . add_argument ( ' --default ' , dest = ' default ' , default = ' space ' , help = ' default type of indentation for files where indentation is not meaningful: could be space or tab ' )
parser . add_argument ( ' --default-size ' , dest = ' default_size ' , default = 8 , help = ' default values for files where indentation is not meaningful ' )
parser . add_argument ( ' -v ' , ' --version ' , dest = ' version ' , help = ' print version ' , action = " store_true " )
args = parser . parse_args ( )
if args . version :
2022-10-03 10:19:00 +02:00
print ( ' IndentFinder v %s ' % VERSION )
2010-10-22 10:03:52 +02:00
return
2018-03-07 17:15:11 +01:00
if args . vim :
VIM_OUTPUT = 1
DEFAULT_RESULT = ( args . default , args . default_size )
file_list = args . files
2010-10-22 10:03:52 +02:00
fi = IndentFinder ( )
if len ( file_list ) > 1 :
# multiple files
for fname in file_list :
fi . clear ( )
fi . parse_file ( fname )
if VIM_OUTPUT :
2022-10-03 10:19:00 +02:00
print ( " %s : %s " % ( fname , fi . vim_output ( ) ) )
2010-10-22 10:03:52 +02:00
else :
2022-10-03 10:19:00 +02:00
print ( " %s : %s " % ( fname , str ( fi ) ) )
2010-10-22 10:03:52 +02:00
return
else :
# only one file, don't print filename
fi . parse_file_list ( file_list )
if VIM_OUTPUT :
sys . stdout . write ( fi . vim_output ( ) )
else :
2022-10-03 10:19:00 +02:00
print ( str ( fi ) )
2010-10-22 10:03:52 +02:00
if __name__ == " __main__ " :
main ( )