Browse files

Add cashtag support.

Use NSMatchingWithoutAnchoringBounds when matching with primary regexp.
  • Loading branch information...
1 parent 726557c commit e3c1425a15cced8d31261080810b0f3aaae29fa2 @psychs psychs committed May 15, 2012
Showing with 147 additions and 8 deletions.
  1. +1 −1 README.rdoc
  2. +1 −0 lib/TwitterText.h
  3. +78 −7 lib/TwitterText.m
  4. +1 −0 lib/TwitterTextEntity.h
  5. +3 −0 lib/TwitterTextEntity.m
  6. +63 −0 testproject/TwitterTextTests.m
View
2 README.rdoc
@@ -30,7 +30,7 @@ https://github.com/twitter/twitter-text-objc/issues
== To Do
-* Cashtags support ($AAPL)
+* None
== Authors
View
1 lib/TwitterText.h
@@ -18,6 +18,7 @@
+ (NSArray*)entitiesInText:(NSString*)text;
+ (NSArray*)URLsInText:(NSString*)text;
+ (NSArray*)hashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap;
++ (NSArray*)cashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap;
+ (NSArray*)mentionedScreenNamesInText:(NSString*)text;
+ (NSArray*)mentionsOrListsInText:(NSString*)text;
+ (TwitterTextEntity*)repliedScreenNameInText:(NSString*)text;
View
85 lib/TwitterText.m
@@ -138,7 +138,7 @@
@"]"
#define TWUHashtagBoundary \
-@"\\A|\\z|[^&a-z0-9_" \
+@"^|$|[^&a-z0-9_" \
TWULatinAccents \
TWUNonLatinHashtagChars \
TWUCJKHashtagCharacters \
@@ -150,6 +150,16 @@
#define TWUEndHashTagMatch @"\\A(?:[##]|://)"
//
+// Cashtag
+//
+
+#define TWUCashtag @"[a-z]{1,6}(?:[._][a-z]{1,2})?"
+#define TWUValidCashtag \
+ @"(?:^|[" TWUUnicodeSpaces @"])" \
+ @"(\\$" TWUCashtag @")" \
+ @"(?=$|\\s|[" TWUPunctuationChars @"])"
+
+//
// Mention and list name
//
@@ -283,12 +293,14 @@
static NSRegularExpression *validTCOURLRegexp;
static NSRegularExpression *validHashtagRegexp;
static NSRegularExpression *endHashtagRegexp;
+static NSRegularExpression *validCashtagRegexp;
static NSRegularExpression *validMentionOrListRegexp;
static NSRegularExpression *validReplyRegexp;
static NSRegularExpression *endMentionRegexp;
@interface TwitterText ()
+ (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities;
++ (NSArray*)cashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities;
@end
@implementation TwitterText
@@ -302,10 +314,14 @@ + (NSArray*)entitiesInText:(NSString*)text
NSMutableArray *results = [NSMutableArray array];
NSArray *urls = [self URLsInText:text];
- NSArray *hashtags = [self hashtagsInText:text withURLEntities:urls];
[results addObjectsFromArray:urls];
+
+ NSArray *hashtags = [self hashtagsInText:text withURLEntities:urls];
[results addObjectsFromArray:hashtags];
+ NSArray *cashtags = [self cashtagsInText:text withURLEntities:urls];
+ [results addObjectsFromArray:cashtags];
+
NSArray *mentionsAndLists = [self mentionsOrListsInText:text];
NSMutableArray *addingItems = [NSMutableArray array];
@@ -361,7 +377,7 @@ + (NSArray*)URLsInText:(NSString*)text
while (1) {
position = NSMaxRange(allRange);
- NSTextCheckingResult *urlResult = [validURLRegexp firstMatchInString:text options:0 range:NSMakeRange(position, len - position)];
+ NSTextCheckingResult *urlResult = [validURLRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!urlResult || urlResult.numberOfRanges < 9) {
break;
}
@@ -466,7 +482,7 @@ + (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
NSInteger position = 0;
while (1) {
- NSTextCheckingResult *matchResult = [validHashtagRegexp firstMatchInString:text options:0 range:NSMakeRange(position, len - position)];
+ NSTextCheckingResult *matchResult = [validHashtagRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 2) {
break;
}
@@ -503,6 +519,61 @@ + (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
return results;
}
++ (NSArray*)cashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
+{
+ if (!text.length) {
+ return [NSArray array];
+ }
+
+ NSArray *urls = nil;
+ if (checkingURLOverlap) {
+ urls = [self URLsInText:text];
+ }
+ return [self cashtagsInText:text withURLEntities:urls];
+}
+
++ (NSArray*)cashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
+{
+ if (!text.length) {
+ return [NSArray array];
+ }
+
+ if (!validCashtagRegexp) {
+ validCashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidCashtag options:NSRegularExpressionCaseInsensitive error:NULL];
+ }
+
+ NSMutableArray *results = [NSMutableArray array];
+ NSInteger len = text.length;
+ NSInteger position = 0;
+
+ while (1) {
+ NSTextCheckingResult *matchResult = [validCashtagRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
+ if (!matchResult || matchResult.numberOfRanges < 2) {
+ break;
+ }
+
+ NSRange cashtagRange = [matchResult rangeAtIndex:1];
+ BOOL matchOk = YES;
+
+ // Check URL overlap
+ for (TwitterTextEntity *urlEntity in urlEntities) {
+ if (NSIntersectionRange(urlEntity.range, cashtagRange).length > 0) {
+ matchOk = NO;
+ break;
+ }
+ }
+
+ if (matchOk) {
+ TwitterTextEntity *entity = [TwitterTextEntity entityWithType:TwitterTextEntityCashtag range:cashtagRange];
+ [results addObject:entity];
+ }
+
+ position = NSMaxRange(matchResult.range);
+ }
+
+ return results;
+}
+
+ (NSArray*)mentionedScreenNamesInText:(NSString*)text
{
if (!text.length) {
@@ -539,7 +610,7 @@ + (NSArray*)mentionsOrListsInText:(NSString*)text
NSInteger position = 0;
while (1) {
- NSTextCheckingResult *matchResult = [validMentionOrListRegexp firstMatchInString:text options:0 range:NSMakeRange(position, len - position)];
+ NSTextCheckingResult *matchResult = [validMentionOrListRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 5) {
break;
}
@@ -586,7 +657,7 @@ + (TwitterTextEntity*)repliedScreenNameInText:(NSString*)text
NSInteger len = text.length;
- NSTextCheckingResult *matchResult = [validReplyRegexp firstMatchInString:text options:0 range:NSMakeRange(0, len)];
+ NSTextCheckingResult *matchResult = [validReplyRegexp firstMatchInString:text options:(NSMatchingWithoutAnchoringBounds | NSMatchingAnchored) range:NSMakeRange(0, len)];
if (!matchResult || matchResult.numberOfRanges < 2) {
return nil;
}
@@ -657,7 +728,7 @@ + (int)remainingCharacterCount:(NSString*)text httpURLLength:(int)httpURLLength
TwitterTextEntity *entity = [urlEntities objectAtIndex:i];
NSRange urlRange = entity.range;
NSString *url = [string substringWithRange:urlRange];
- if ([url rangeOfString:@"https" options:NSCaseInsensitiveSearch | NSAnchoredSearch].location == 0) {
+ if ([url rangeOfString:@"https" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location == 0) {
urlLengthOffset += httpsURLLength;
} else {
urlLengthOffset += httpURLLength;
View
1 lib/TwitterTextEntity.h
@@ -17,6 +17,7 @@ typedef enum {
TwitterTextEntityScreenName,
TwitterTextEntityHashtag,
TwitterTextEntityListName,
+ TwitterTextEntityCashtag,
} TwitterTextEntityType;
@interface TwitterTextEntity : NSObject
View
3 lib/TwitterTextEntity.m
@@ -73,6 +73,9 @@ - (NSString*)description
case TwitterTextEntityListName:
typeString = @"ListName";
break;
+ case TwitterTextEntityCashtag:
+ typeString = @"Cashtag";
+ break;
}
return [NSString stringWithFormat:@"<%@: %@ %@>", NSStringFromClass([self class]), typeString, NSStringFromRange(range)];
}
View
63 testproject/TwitterTextTests.m
@@ -51,6 +51,8 @@ - (void)testExtract
NSArray *urlsWithIndices = [tests objectForKey:@"urls_with_indices"];
NSArray *hashtags = [tests objectForKey:@"hashtags"];
NSArray *hashtagsWithIndices = [tests objectForKey:@"hashtags_with_indices"];
+ NSArray *cashtags = [tests objectForKey:@"cashtags"];
+ NSArray *cashtagsWithIndices = [tests objectForKey:@"cashtags_with_indices"];
//
// Mentions
@@ -290,6 +292,67 @@ - (void)testExtract
STFail(@"Matching count is different: %lu != %lu\n%@", expected.count, results.count, testCase);
}
}
+
+ //
+ // Cashtag
+ //
+
+ for (NSDictionary *testCase in cashtags) {
+ NSString *text = [testCase objectForKey:@"text"];
+ NSArray *expected = [testCase objectForKey:@"expected"];
+
+ NSArray *results = [TwitterText cashtagsInText:text checkingURLOverlap:YES];
+ if (results.count == expected.count) {
+ int count = results.count;
+ for (int i=0; i<count; i++) {
+ NSString *expectedText = [expected objectAtIndex:i];
+
+ TwitterTextEntity *entity = [results objectAtIndex:i];
+ NSRange r = entity.range;
+ r.location++;
+ r.length--;
+ NSString *actualText = [text substringWithRange:r];
+
+ STAssertEqualObjects(expectedText, actualText, @"%@", testCase);
+ }
+ } else {
+ STFail(@"Matching count is different: %lu != %lu\n%@", expected.count, results.count, testCase);
+ NSLog(@"### %@", results);
+ }
+ }
+
+ //
+ // Cashtags with indices
+ //
+ for (NSDictionary *testCase in cashtagsWithIndices) {
+ NSString *text = [testCase objectForKey:@"text"];
+ NSArray *expected = [testCase objectForKey:@"expected"];
+
+ NSArray *results = [TwitterText cashtagsInText:text checkingURLOverlap:YES];
+ if (results.count == expected.count) {
+ int count = results.count;
+ for (int i=0; i<count; i++) {
+ NSDictionary *expectedDic = [expected objectAtIndex:i];
+ NSString *expectedCashtag = [expectedDic objectForKey:@"cashtag"];
+ NSArray *expectedIndices = [expectedDic objectForKey:@"indices"];
+ int expectedStart = [[expectedIndices objectAtIndex:0] intValue];
+ int expectedEnd = [[expectedIndices objectAtIndex:1] intValue];
+ NSRange expectedRange = NSMakeRange(expectedStart, expectedEnd - expectedStart);
+
+ TwitterTextEntity *entity = [results objectAtIndex:i];
+ NSRange actualRange = entity.range;
+ NSRange r = actualRange;
+ r.location++;
+ r.length--;
+ NSString *actualText = [text substringWithRange:r];
+
+ STAssertEqualObjects(expectedCashtag, actualText, @"%@", testCase);
+ STAssertTrue(NSEqualRanges(expectedRange, actualRange), @"%@ != %@\n%@", NSStringFromRange(expectedRange), NSStringFromRange(actualRange), testCase);
+ }
+ } else {
+ STFail(@"Matching count is different: %lu != %lu\n%@", expected.count, results.count, testCase);
+ }
+ }
}
@end

0 comments on commit e3c1425

Please sign in to comment.