Skip to content

Commit 49387be

Browse files
committed
Selector splitting: don't split selectors in the middle of whitelisted "multi-words".
Swift SVN r16016
1 parent f6a196f commit 49387be

File tree

7 files changed

+194
-10
lines changed

7 files changed

+194
-10
lines changed

include/swift/Basic/StringExtras.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
#define SWIFT_BASIC_STRINGEXTRAS_HPP
1919

2020
#include "swift/Basic/LLVM.h"
21+
#include "llvm/ADT/SmallVector.h"
22+
#include "llvm/ADT/StringMap.h"
2123
#include "llvm/ADT/StringRef.h"
2224
#include <iterator>
25+
#include <string>
2326

2427
namespace swift {
2528
/// Describes the kind of preposition a word is.
@@ -183,6 +186,10 @@ namespace swift {
183186
/// Retrieve the camelCase words in the given string.
184187
inline Words getWords(StringRef string) { return Words(string); }
185188

189+
/// Check whether the two words are the same, ignoring the case of the
190+
/// first letter.
191+
bool sameWordIgnoreFirstCase(StringRef word1, StringRef word2);
192+
186193
/// Lowercase the first word within the given camelCase string.
187194
///
188195
/// \param string The string to lowercase.
@@ -201,6 +208,46 @@ namespace swift {
201208
///
202209
/// \returns the string in sentence case.
203210
StringRef toSentencecase(StringRef string, SmallVectorImpl<char> &scratch);
211+
212+
/// Data structures that stores a set of multi-words, which are camelCase
213+
/// word sequences that should be kept together by an algorithm that splits
214+
/// up camelCase names.
215+
///
216+
/// This data structure stores the multiwords backwards. The key is the last
217+
/// word in the multi-word. The element is a vector of multi-words, where
218+
/// each element in the vector is a multi-word stored backwards with the
219+
/// last word removed.
220+
///
221+
/// FIXME: This is a crummy data structure. We should optimize this to avoid
222+
/// redundant storage and provide faster lookup.
223+
class MultiWordMap {
224+
llvm::StringMap<SmallVector<SmallVector<std::string, 1>, 1>> Data;
225+
226+
public:
227+
/// Whether the map is empty.
228+
bool empty() const { return Data.empty(); }
229+
230+
// Basis case: add an entry in multi-word map with \p last as the last word.
231+
SmallVectorImpl<std::string> &insert(StringRef last) {
232+
auto &words = Data[last];
233+
words.push_back({});
234+
return words.back();
235+
}
236+
237+
// Recursive case: add the current word to the end of the current
238+
// multiword.
239+
template<typename ...Args>
240+
SmallVectorImpl<std::string> &insert(StringRef current, Args... args) {
241+
auto &multiWord = insert(args...);
242+
multiWord.push_back(current.str());
243+
return multiWord;
244+
}
245+
246+
/// Match the longest (multi-)word possible, advancing the reverse
247+
/// iterator to point at the beginning of the longest (multi-)word match.
248+
Words::reverse_iterator match(Words::reverse_iterator first,
249+
Words::reverse_iterator last) const;
250+
};
204251
} // end namespace camel_case
205252
}
206253

lib/Basic/StringExtras.cpp

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@
1515
//
1616
//===----------------------------------------------------------------------===//
1717
#include "swift/Basic/StringExtras.h"
18+
#include "swift/Basic/Optional.h"
1819
#include "clang/Basic/CharInfo.h"
20+
#include "llvm/ADT/SmallString.h"
1921
#include "llvm/ADT/SmallVector.h"
22+
#include <algorithm>
23+
2024
using namespace swift;
2125
using namespace camel_case;
2226

@@ -101,7 +105,17 @@ StringRef camel_case::getLastWord(StringRef string) {
101105
return *--WordIterator(string, string.size());
102106
}
103107

104-
StringRef camel_case::toLowercaseWord(StringRef string,
108+
bool camel_case::sameWordIgnoreFirstCase(StringRef word1, StringRef word2) {
109+
if (word1.size() != word2.size())
110+
return false;
111+
112+
if (clang::toLowercase(word1[0]) != clang::toLowercase(word2[0]))
113+
return false;
114+
115+
return word1.substr(1) == word2.substr(1);
116+
}
117+
118+
StringRef camel_case::toLowercaseWord(StringRef string,
105119
SmallVectorImpl<char> &scratch) {
106120
if (string.empty())
107121
return string;
@@ -136,3 +150,41 @@ StringRef camel_case::toSentencecase(StringRef string,
136150
scratch.append(string.begin() + 1, string.end());
137151
return StringRef(scratch.data(), scratch.size());
138152
}
153+
154+
Words::reverse_iterator MultiWordMap::match(Words::reverse_iterator first,
155+
Words::reverse_iterator last) const{
156+
assert(first != last && "Empty sequence cannot be matched");
157+
158+
// If the current word isn't at the start of the sequence, we're done.
159+
llvm::SmallString<32> scratch;
160+
auto known = Data.find(toLowercaseWord(*first, scratch));
161+
if (known == Data.end())
162+
return first;
163+
164+
/// The best result (if we found anything) and the length of the match.
165+
Optional<std::pair<Words::reverse_iterator, unsigned>> bestResult;
166+
for (const auto &multiword : known->second) {
167+
// Match the words in the sequence to the words in this multiword.
168+
auto matchNext = first;
169+
++matchNext;
170+
171+
bool matched = true;
172+
for (const auto &word : multiword) {
173+
if (matchNext == last || !sameWordIgnoreFirstCase(word, *matchNext++)) {
174+
matched = false;
175+
break;
176+
}
177+
}
178+
179+
if (!matched)
180+
continue;
181+
182+
// If this is the first result, or the previous result was a shorter match,
183+
// we just found a better result.
184+
if (!bestResult || bestResult->second < multiword.size()) {
185+
bestResult = { std::prev(matchNext), multiword.size() };
186+
}
187+
}
188+
189+
return bestResult ? bestResult->first : first;
190+
}

lib/ClangImporter/ClangImporter.cpp

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ STATISTIC(NumPrepositionSplitMethodNames,
6262
STATISTIC(NumWordSplitMethodNames,
6363
"selectors where the first selector piece was split on the last "
6464
"word");
65+
STATISTIC(NumMultiWordSplitMethodNames,
66+
"selectors where the first slector piece was split on the last "
67+
"multiword");
6568
STATISTIC(NumPrepositionTrailingFirstPiece,
6669
"selectors where the first piece ends in a preposition");
6770
STATISTIC(NumMethodsMissingFirstArgName,
@@ -531,7 +534,6 @@ splitSelectorPieceAt(StringRef selector, unsigned index,
531534
camel_case::toLowercaseWord(selector.substr(index), buffer) };
532535
}
533536

534-
535537
/// Split the first selector piece into a function name and first
536538
/// parameter name, if possible.
537539
///
@@ -542,7 +544,9 @@ splitSelectorPieceAt(StringRef selector, unsigned index,
542544
/// the split. If no split is possible, the function name will be \c
543545
/// selector and the parameter name will be empty.
544546
static std::pair<StringRef, StringRef>
545-
splitFirstSelectorPiece(StringRef selector, SmallVectorImpl<char> &scratch) {
547+
splitFirstSelectorPiece(StringRef selector,
548+
const camel_case::MultiWordMap &multiWords,
549+
SmallVectorImpl<char> &scratch) {
546550
// Get the camelCase words in this selector.
547551
auto words = camel_case::getWords(selector);
548552
if (words.empty())
@@ -574,13 +578,21 @@ splitFirstSelectorPiece(StringRef selector, SmallVectorImpl<char> &scratch) {
574578

575579
// We did not find a preposition.
576580

577-
// If there is more than one word, split off the last word.
581+
// If there is more than one word, split off the last word, taking care
582+
// to avoid splitting in the middle of a multi-word.
578583
auto last = words.rbegin();
579584
if (std::next(last) != words.rend()) {
580-
++NumWordSplitMethodNames;
581-
return splitSelectorPieceAt(selector,
582-
std::prev(last.base()).getPosition(),
583-
scratch);
585+
auto splitPoint = multiWords.match(last, words.rend());
586+
auto beforeSplitPoint = std::next(splitPoint);
587+
if (beforeSplitPoint != words.rend()) {
588+
if (splitPoint == last)
589+
++NumWordSplitMethodNames;
590+
else
591+
++NumMultiWordSplitMethodNames;
592+
return splitSelectorPieceAt(selector,
593+
std::prev(splitPoint.base()).getPosition(),
594+
scratch);
595+
}
584596
}
585597

586598
// Nothing to split.
@@ -614,7 +626,8 @@ static Identifier importArgName(ASTContext &ctx, StringRef name) {
614626
/// Map an Objective-C selector name to a Swift method name.
615627
static DeclName mapSelectorName(ASTContext &ctx,
616628
clang::Selector selector,
617-
bool isInitializer) {
629+
bool isInitializer,
630+
const camel_case::MultiWordMap &multiWords) {
618631
assert(!selector.isNull() && "Null selector?");
619632

620633
// Zero-argument selectors.
@@ -649,6 +662,7 @@ static DeclName mapSelectorName(ASTContext &ctx,
649662
llvm::SmallString<16> scratch;
650663
StringRef funcName, firstArgName;
651664
std::tie(funcName, firstArgName) = splitFirstSelectorPiece(firstPiece,
665+
multiWords,
652666
scratch);
653667
baseName = ctx.getIdentifier(funcName);
654668
argumentNames.push_back(importArgName(ctx, firstArgName));
@@ -749,13 +763,23 @@ DeclName ClangImporter::Implementation::importName(clang::Selector selector,
749763
return known->second;
750764

751765
// Map the selector.
752-
auto result = mapSelectorName(SwiftContext, selector, isInitializer);
766+
populateMultiWords();
767+
auto result = mapSelectorName(SwiftContext, selector, isInitializer,
768+
MultiWords);
753769

754770
// Cache the result and return.
755771
SelectorMappings[{selector, isInitializer}] = result;
756772
return result;
757773
}
758774

775+
void ClangImporter::Implementation::populateMultiWords() {
776+
if (!MultiWords.empty())
777+
return;
778+
779+
#define MULTI_WORD(...) MultiWords.insert(__VA_ARGS__);
780+
#include "MultiWords.def"
781+
}
782+
759783
void ClangImporter::Implementation::populateKnownDesignatedInits() {
760784
if (!KnownDesignatedInits.empty())
761785
return;

lib/ClangImporter/ImporterImpl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "swift/ClangImporter/ClangImporter.h"
2121
#include "swift/AST/LazyResolver.h"
2222
#include "swift/AST/Type.h"
23+
#include "swift/Basic/StringExtras.h"
2324
#include "swift/Basic/Optional.h"
2425
#include "clang/Frontend/CompilerInstance.h"
2526
#include "clang/Frontend/FrontendActions.h"
@@ -230,6 +231,12 @@ class LLVM_LIBRARY_VISIBILITY ClangImporter::Implementation
230231
/// Mapping from Objective-C selectors to method names.
231232
llvm::DenseMap<std::pair<clang::Selector, char>, DeclName> SelectorMappings;
232233

234+
/// Multiwords used for selector splitting.
235+
camel_case::MultiWordMap MultiWords;
236+
237+
/// Populate the MultiWords map from \c MultiWords.def.
238+
void populateMultiWords();
239+
233240
/// Mapping that describes the designated initializers of
234241
/// Objective-C classes.
235242
///

lib/ClangImporter/MultiWords.def

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//===--- MultiWords.def - Multiple Word Database ----------------*- C++ -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See http://swift.org/LICENSE.txt for license information
9+
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
//
13+
// This file defines multiple-word sequences that should not be split up
14+
// as part of Objective-C selector splitting.
15+
// ===---------------------------------------------------------------------===//
16+
17+
/// MULTI_WORD(Words)
18+
/// Words: A comma-separated sequence of words that make up
19+
/// the multi-word.
20+
21+
#ifndef MULTI_WORD
22+
# error define MULTI_WORD before including MultiWords.def
23+
#endif
24+
25+
MULTI_WORD("attributed", "string")
26+
27+
#undef MULTI_WORD

test/Inputs/clang-importer-sdk/usr/include/Foundation.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,7 @@ typedef struct _NSRange {
216216
NSUInteger location;
217217
NSUInteger length;
218218
} NSRange;
219+
220+
@interface NSAttributedString : NSString
221+
- (NSAttributedString *)sliceAttributedString:(NSInteger)startIndex;
222+
@end

unittests/Basic/StringExtrasTest.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,26 @@ TEST(ToSentencecaseTest, Words) {
7373
EXPECT_EQ(camel_case::toSentencecase("URL", scratch), "URL");
7474
EXPECT_EQ(camel_case::toSentencecase("", scratch), "");
7575
}
76+
77+
TEST(MultiWordsTest, Match) {
78+
camel_case::MultiWordMap multiWords;
79+
multiWords.insert("attributed", "string");
80+
multiWords.insert("table", "view");
81+
multiWords.insert("table", "view", "controller");
82+
multiWords.insert("table", "top");
83+
84+
auto words1 = camel_case::getWords("createTableView");
85+
EXPECT_EQ(*multiWords.match(words1.rbegin(), words1.rend()), "Table");
86+
87+
auto words2 = camel_case::getWords("createTableViewController");
88+
EXPECT_EQ(*multiWords.match(words2.rbegin(), words2.rend()), "Table");
89+
90+
auto words3 = camel_case::getWords("createSubview");
91+
EXPECT_EQ(*multiWords.match(words3.rbegin(), words3.rend()), "Subview");
92+
93+
auto words4 = camel_case::getWords("addView");
94+
EXPECT_EQ(*multiWords.match(words4.rbegin(), words4.rend()), "View");
95+
96+
auto words5 = camel_case::getWords("insertAttributedString");
97+
EXPECT_EQ(*multiWords.match(words5.rbegin(), words5.rend()), "Attributed");
98+
}

0 commit comments

Comments
 (0)