xref: /illumos-gate/usr/src/tools/onbld/Checks/SpellCheck.py (revision 856f710c9dc323b39da5935194d7928ffb99b67f)
1#
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or http://www.opensolaris.org/os/licensing.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright 2016 Joyent, Inc.
24# Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
25#
26
27import re, sys
28
29spellMsg = '%s: Line %d contains "%s", a common misspelling of "%s"\n'
30altMsg = '%s: Line %d contains "%s"; please use "%s" instead for consistency with other documentation\n'
31
32misspellings = {
33	'absense': 'absence',
34	'accessable': 'accessible',
35	'accomodate': 'accommodate',
36	'accomodation': 'accommodation',
37	'accross': 'across',
38	'acheive': 'achieve',
39	'addional': 'additional',
40	'addres': 'address',
41	'admininistrative': 'administrative',
42	'adminstered': 'administered',
43	'adminstrate': 'administrate',
44	'adminstration': 'administration',
45	'adminstrative': 'administrative',
46	'adminstrator': 'administrator',
47	'admissability': 'admissibility',
48	'adress': 'address',
49	'adressable': 'addressable',
50	'adressed': 'addressed',
51	'adressing': 'addressing, dressing',
52	'aginst': 'against',
53	'agression': 'aggression',
54	'agressive': 'aggressive',
55	'alot': 'a lot, allot',
56	'and and': 'and',
57	'apparantly': 'apparently',
58	'appearence': 'appearance',
59	'arguement': 'argument',
60	'assasination': 'assassination',
61	'auxilliary': 'auxiliary',
62	'basicly': 'basically',
63	'begining': 'beginning',
64	'belive': 'believe',
65	'beteen': 'between',
66	'betwen': 'between',
67	'beween': 'between',
68	'bewteen': 'between',
69	'bizzare': 'bizarre',
70	'buisness': 'business',
71	'calender': 'calendar',
72	'cemetary': 'cemetery',
73	'chauffer': 'chauffeur',
74	'collegue': 'colleague',
75	'comming': 'coming',
76	'commited': 'committed',
77	'commitee': 'committee',
78	'commiting': 'committing',
79	'comparision': 'comparison',
80	'comparisions': 'comparisons',
81	'compatability': 'compatibility',
82	'compatable': 'compatible',
83	'compatablity': 'compatibility',
84	'compatiable': 'compatible',
85	'compatiblity': 'compatibility',
86	'completly': 'completely',
87	'concious': 'conscious',
88	'condidtion': 'condition',
89	'conected': 'connected',
90	'conjuction': 'conjunction',
91	'continous': 'continuous',
92	'curiousity': 'curiosity',
93	'deamon': 'daemon',
94	'definately': 'definitely',
95	'desireable': 'desirable',
96	'diffrent': 'different',
97	'dilemna': 'dilemma',
98	'dissapear': 'disappear',
99	'dissapoint': 'disappoint',
100	'ecstacy': 'ecstasy',
101	'embarass': 'embarrass',
102	'enviroment': 'environment',
103	'exept': 'except',
104	'existance': 'existence',
105	'familar': 'familiar',
106	'finaly': 'finally',
107	'folowing': 'following',
108	'foriegn': 'foreign',
109	'forseeable': 'foreseeable',
110	'fourty': 'forty',
111	'foward': 'forward',
112	'freind': 'friend',
113	'futher': 'further',
114	'gaurd': 'guard',
115	'glamourous': 'glamorous',
116	'goverment': 'government',
117	'happend': 'happened',
118	'harrassment': 'harassment',
119	'hierachical': 'hierarchical',
120	'hierachies': 'hierarchies',
121	'hierachy': 'hierarchy',
122	'hierarcical': 'hierarchical',
123	'hierarcy': 'hierarchy',
124	'honourary': 'honorary',
125	'humourous': 'humorous',
126	'idiosyncracy': 'idiosyncrasy',
127	'immediatly': 'immediately',
128	'inaccessable': 'inaccessible',
129	'inbetween': 'between',
130	'incidently': 'incidentally',
131	'independant': 'independent',
132	'infomation': 'information',
133	'interupt': 'interrupt',
134	'intial': 'initial',
135	'intially': 'initially',
136	'irresistable': 'irresistible',
137	'jist': 'gist',
138	'knowlege': 'knowledge',
139	'lenght': 'length',
140	'liase': 'liaise',
141	'liason': 'liaison',
142	'libary': 'library',
143	'maching': 'machine, marching, matching',
144	'millenia': 'millennia',
145	'millenium': 'millennium',
146	'neccessary': 'necessary',
147	'negotation': 'negotiation',
148	'nontheless': 'nonetheless',
149	'noticable': 'noticeable',
150	'occassion': 'occasion',
151	'occassional': 'occasional',
152	'occassionally': 'occasionally',
153	'occurance': 'occurrence',
154	'occured': 'occurred',
155	'occurence': 'occurrence',
156	'occuring': 'occurring',
157	'ommision': 'omission',
158	'orginal': 'original',
159	'orginally': 'originally',
160	'ouput': 'output',
161	'overriden': 'overridden',
162	'particuliar': 'particular',
163	'pavillion': 'pavilion',
164	'peice': 'piece',
165	'persistant': 'persistent',
166	'politican': 'politician',
167	'posession': 'possession',
168	'possiblity': 'possibility',
169	'preceed': 'precede',
170	'preceeded': 'preceded',
171	'preceeding': 'preceding',
172	'preceeds': 'precedes',
173	'prefered': 'preferred',
174	'prefering': 'preferring',
175	'presense': 'presence',
176	'proces': 'process',
177	'propoganda': 'propaganda',
178	'psuedo': 'pseudo',
179	'publically': 'publicly',
180	'realy': 'really',
181	'reciept': 'receipt',
182	'recieve': 'receive',
183	'recieved': 'received',
184	'reciever': 'receiver',
185	'recievers': 'receivers',
186	'recieves': 'receives',
187	'recieving': 'receiving',
188	'recomend': 'recommend',
189	'recomended': 'recommended',
190	'recomending': 'recommending',
191	'recomends': 'recommends',
192	'recurse': 'recur',
193	'recurses': 'recurs',
194	'recursing': 'recurring',
195	'refered': 'referred',
196	'refering': 'referring',
197	'religous': 'religious',
198	'rember': 'remember',
199	'remeber': 'remember',
200	'repetion': 'repetition',
201	'reponsible': 'responsible',
202	'resistence': 'resistance',
203	'retreive': 'retrieve',
204	'seige': 'siege',
205	'sence': 'since',
206	'seperate': 'separate',
207	'seperated': 'separated',
208	'seperately': 'separately',
209	'seperates': 'separates',
210	'similiar': 'similar',
211	'somwhere': 'somewhere',
212	'sould': 'could, should, sold, soul',
213	'sturcture': 'structure',
214	'succesful': 'successful',
215	'succesfully': 'successfully',
216	'successfull': 'successful',
217	'sucessful': 'successful',
218	'supercede': 'supersede',
219	'supress': 'suppress',
220	'supressed': 'suppressed',
221	'suprise': 'surprise',
222	'suprisingly': 'surprisingly',
223	'sytem': 'system',
224	'tendancy': 'tendency',
225	'the the': 'the',
226	'the these': 'these',
227	'therefor': 'therefore',
228	'threshhold': 'threshold',
229	'tolerence': 'tolerance',
230	'tommorow': 'tomorrow',
231	'tommorrow': 'tomorrow',
232	'tounge': 'tongue',
233	'tranformed': 'transformed',
234	'transfered': 'transferred',
235	'truely': 'truly',
236	'trustworthyness': 'trustworthiness',
237	'uncommited': 'uncommitted',
238	'unforseen': 'unforeseen',
239	'unfortunatly': 'unfortunately',
240	'unsuccessfull': 'unsuccessful',
241	'untill': 'until',
242	'upto': 'up to',
243	'whereever': 'wherever',
244	'wich': 'which',
245	'wierd': 'weird',
246	'wtih': 'with',
247}
248
249alternates = {
250	'parseable': 'parsable',
251	'sub-command': 'subcommand',
252	'sub-commands': 'subcommands',
253	'writeable': 'writable'
254}
255
256misspellingREs = []
257alternateREs = []
258
259for misspelling, correct in misspellings.items():
260	regex = re.compile(r'\b%s\b' % (misspelling), re.IGNORECASE)
261	entry = (regex, misspelling, correct)
262	misspellingREs.append(entry)
263
264for alternate, correct in alternates.items():
265	regex = re.compile(r'\b%s\b' % (alternate), re.IGNORECASE)
266	entry = (regex, alternate, correct)
267	alternateREs.append(entry)
268
269def check(errmsg, output, filename, line, lineno, entry):
270	if entry[0].search(line):
271		output.write(errmsg % (filename, lineno, entry[1], entry[2]))
272		return 1
273	else:
274		return 0
275
276def spellcheck(fh, filename=None, output=sys.stderr, **opts):
277	lineno = 1
278	ret = 0
279
280	if not filename:
281		filename = fh.name
282
283	fh.seek(0)
284	for line in fh:
285		line = line.decode(errors='replace')
286		for entry in misspellingREs:
287			ret |= check(spellMsg, output, filename, line,
288			    lineno, entry)
289		for entry in alternateREs:
290			ret |= check(altMsg, output, filename, line,
291			    lineno, entry)
292		lineno += 1
293
294	return ret
295