Line data Source code
1 : /* Find near-matches for strings and identifiers. 2 : Copyright (C) 2015-2023 Free Software Foundation, Inc. 3 : 4 : This file is part of GCC. 5 : 6 : GCC is free software; you can redistribute it and/or modify it under 7 : the terms of the GNU General Public License as published by the Free 8 : Software Foundation; either version 3, or (at your option) any later 9 : version. 10 : 11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 : for more details. 15 : 16 : You should have received a copy of the GNU General Public License 17 : along with GCC; see the file COPYING3. If not see 18 : <http://www.gnu.org/licenses/>. */ 19 : 20 : #ifndef GCC_SPELLCHECK_H 21 : #define GCC_SPELLCHECK_H 22 : 23 : typedef unsigned int edit_distance_t; 24 : const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; 25 : 26 : /* spellcheck.cc */ 27 : extern edit_distance_t 28 : get_edit_distance (const char *s, int len_s, 29 : const char *t, int len_t); 30 : 31 : extern edit_distance_t 32 : get_edit_distance (const char *s, const char *t); 33 : 34 : extern const char * 35 : find_closest_string (const char *target, 36 : const auto_vec<const char *> *candidates); 37 : 38 : /* A traits class for describing a string-like type usable by 39 : class best_match. 40 : Specializations should provide the implementations of the following: 41 : 42 : static size_t get_length (TYPE); 43 : static const char *get_string (TYPE); 44 : 45 : get_string should return a non-NULL ptr, which does not need to be 46 : 0-terminated. */ 47 : 48 : template <typename TYPE> 49 : struct edit_distance_traits {}; 50 : 51 : /* Specialization of edit_distance_traits for C-style strings. */ 52 : 53 : template <> 54 : struct edit_distance_traits<const char *> 55 : { 56 409226 : static size_t get_length (const char *str) 57 : { 58 409226 : gcc_assert (str); 59 409226 : return strlen (str); 60 : } 61 : 62 180003 : static const char *get_string (const char *str) 63 : { 64 180003 : gcc_assert (str); 65 180003 : return str; 66 : } 67 : }; 68 : 69 : extern edit_distance_t get_edit_distance_cutoff (size_t goal_len, 70 : size_t candidate_len); 71 : 72 : /* A type for use when determining the best match against a string, 73 : expressed as a template so that we can match against various 74 : string-like types (const char *, frontend identifiers, and preprocessor 75 : macros). 76 : 77 : This type accumulates the best possible match against GOAL_TYPE for 78 : a sequence of elements of CANDIDATE_TYPE, whilst minimizing the 79 : number of calls to get_edit_distance and to 80 : edit_distance_traits<T>::get_length. */ 81 : 82 : template <typename GOAL_TYPE, typename CANDIDATE_TYPE> 83 : class best_match 84 : { 85 : public: 86 : typedef GOAL_TYPE goal_t; 87 : typedef CANDIDATE_TYPE candidate_t; 88 : typedef edit_distance_traits<goal_t> goal_traits; 89 : typedef edit_distance_traits<candidate_t> candidate_traits; 90 : 91 : /* Constructor. */ 92 : 93 2209 : best_match (GOAL_TYPE goal, 94 : edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) 95 2209 : : m_goal (goal_traits::get_string (goal)), 96 2209 : m_goal_len (goal_traits::get_length (goal)), 97 2209 : m_best_candidate (NULL), 98 2209 : m_best_distance (best_distance_so_far), 99 2209 : m_best_candidate_len (0) 100 2209 : {} 101 : 102 : /* Compare the edit distance between CANDIDATE and m_goal, 103 : and if it's the best so far, record it. */ 104 : 105 409226 : void consider (candidate_t candidate) 106 : { 107 409226 : size_t candidate_len = candidate_traits::get_length (candidate); 108 : 109 : /* Calculate a lower bound on the candidate's distance to the goal, 110 : based on the difference in lengths; it will require at least 111 : this many insertions/deletions. */ 112 409226 : edit_distance_t min_candidate_distance 113 409226 : = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len); 114 : 115 : /* If the candidate's length is sufficiently different to that 116 : of the goal string, then the number of insertions/deletions 117 : may be >= the best distance so far. If so, we can reject 118 : the candidate immediately without needing to compute 119 : the exact distance, since it won't be an improvement. */ 120 409226 : if (min_candidate_distance >= m_best_distance) 121 : return; 122 : 123 : /* If the candidate will be unable to beat the criterion in 124 : get_best_meaningful_candidate, reject it without computing 125 : the exact distance. */ 126 181790 : edit_distance_t cutoff = get_cutoff (candidate_len); 127 181790 : if (min_candidate_distance > cutoff) 128 : return; 129 : 130 : /* Otherwise, compute the distance and see if the candidate 131 : has beaten the previous best value. */ 132 180003 : const char *candidate_str = candidate_traits::get_string (candidate); 133 : edit_distance_t dist 134 180003 : = get_edit_distance (m_goal, m_goal_len, candidate_str, candidate_len); 135 : 136 180003 : bool is_better = false; 137 180003 : if (dist < m_best_distance) 138 : is_better = true; 139 174447 : else if (dist == m_best_distance) 140 : { 141 : /* Prefer a candidate that inserts a trailing '=', 142 : so that for 143 : "-ftrivial-auto-var-init" 144 : we suggest 145 : "-ftrivial-auto-var-init=" 146 : rather than 147 : "-Wtrivial-auto-var-init". */ 148 : /* Prefer a candidate has a difference in trailing sign character. */ 149 9527 : if (candidate_str[candidate_len - 1] == '=' 150 1 : && m_goal[m_goal_len - 1] != '=') 151 : is_better = true; 152 : } 153 : 154 : if (is_better) 155 : { 156 5557 : m_best_distance = dist; 157 5557 : m_best_candidate = candidate; 158 5557 : m_best_candidate_len = candidate_len; 159 : } 160 : } 161 : 162 : /* Assuming that BEST_CANDIDATE is known to be better than 163 : m_best_candidate, update (without recomputing the edit distance to 164 : the goal). */ 165 : 166 : void set_best_so_far (CANDIDATE_TYPE best_candidate, 167 : edit_distance_t best_distance, 168 : size_t best_candidate_len) 169 : { 170 : gcc_assert (best_distance < m_best_distance); 171 : m_best_candidate = best_candidate; 172 : m_best_distance = best_distance; 173 : m_best_candidate_len = best_candidate_len; 174 : } 175 : 176 : /* Generate the maximum edit distance for which we consider a suggestion 177 : to be meaningful, given a candidate of length CANDIDATE_LEN. */ 178 : 179 184085 : edit_distance_t get_cutoff (size_t candidate_len) const 180 : { 181 181790 : return ::get_edit_distance_cutoff (m_goal_len, candidate_len); 182 : } 183 : 184 : /* Get the best candidate so far, but applying a filter to ensure 185 : that we return NULL if none of the candidates are close to the goal, 186 : to avoid offering nonsensical suggestions to the user. */ 187 : 188 4177 : candidate_t get_best_meaningful_candidate () const 189 : { 190 : /* If the edit distance is too high, the suggestion is likely to be 191 : meaningless. */ 192 4177 : if (m_best_candidate) 193 : { 194 2295 : edit_distance_t cutoff = get_cutoff (m_best_candidate_len); 195 2295 : if (m_best_distance > cutoff) 196 : return NULL; 197 : } 198 : 199 : /* If the goal string somehow makes it into the candidate list, offering 200 : it as a suggestion will be nonsensical e.g. 201 : 'constexpr' does not name a type; did you mean 'constexpr'? 202 : Ultimately such suggestions are due to bugs in constructing the 203 : candidate list, but as a band-aid, do not offer suggestions for 204 : distance == 0 (where candidate == goal). */ 205 2351 : if (m_best_distance == 0) 206 : return NULL; 207 : 208 2228 : return m_best_candidate; 209 : } 210 : 211 : /* Get the closest candidate so far, without applying any filtering. */ 212 : 213 89 : candidate_t blithely_get_best_candidate () const 214 : { 215 89 : return m_best_candidate; 216 : } 217 : 218 4018 : edit_distance_t get_best_distance () const { return m_best_distance; } 219 : size_t get_best_candidate_length () const { return m_best_candidate_len; } 220 : 221 : private: 222 : const char *m_goal; 223 : size_t m_goal_len; 224 : candidate_t m_best_candidate; 225 : edit_distance_t m_best_distance; 226 : size_t m_best_candidate_len; 227 : }; 228 : 229 : #endif /* GCC_SPELLCHECK_H */