Skip to content
This repository has been archived by the owner on Sep 18, 2021. It is now read-only.

Commit

Permalink
Add cashtag support.
Browse files Browse the repository at this point in the history
Use NSMatchingWithoutAnchoringBounds when matching with primary regexp.
  • Loading branch information
psychs committed May 19, 2012
1 parent 726557c commit e3c1425
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.rdoc
Expand Up @@ -30,7 +30,7 @@ https://github.com/twitter/twitter-text-objc/issues

== To Do

* Cashtags support ($AAPL)
* None

== Authors

Expand Down
1 change: 1 addition & 0 deletions lib/TwitterText.h
Expand Up @@ -18,6 +18,7 @@
+ (NSArray*)entitiesInText:(NSString*)text;
+ (NSArray*)URLsInText:(NSString*)text;
+ (NSArray*)hashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap;
+ (NSArray*)cashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap;
+ (NSArray*)mentionedScreenNamesInText:(NSString*)text;
+ (NSArray*)mentionsOrListsInText:(NSString*)text;
+ (TwitterTextEntity*)repliedScreenNameInText:(NSString*)text;
Expand Down
85 changes: 78 additions & 7 deletions lib/TwitterText.m
Expand Up @@ -138,7 +138,7 @@
@"]"

#define TWUHashtagBoundary \
@"\\A|\\z|[^&a-z0-9_" \
@"^|$|[^&a-z0-9_" \
TWULatinAccents \
TWUNonLatinHashtagChars \
TWUCJKHashtagCharacters \
Expand All @@ -149,6 +149,16 @@

#define TWUEndHashTagMatch @"\\A(?:[##]|://)"

//
// Cashtag
//

#define TWUCashtag @"[a-z]{1,6}(?:[._][a-z]{1,2})?"
#define TWUValidCashtag \
@"(?:^|[" TWUUnicodeSpaces @"])" \
@"(\\$" TWUCashtag @")" \
@"(?=$|\\s|[" TWUPunctuationChars @"])"

//
// Mention and list name
//
Expand Down Expand Up @@ -283,12 +293,14 @@
static NSRegularExpression *validTCOURLRegexp;
static NSRegularExpression *validHashtagRegexp;
static NSRegularExpression *endHashtagRegexp;
static NSRegularExpression *validCashtagRegexp;
static NSRegularExpression *validMentionOrListRegexp;
static NSRegularExpression *validReplyRegexp;
static NSRegularExpression *endMentionRegexp;

@interface TwitterText ()
+ (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities;
+ (NSArray*)cashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities;
@end

@implementation TwitterText
Expand All @@ -302,10 +314,14 @@ + (NSArray*)entitiesInText:(NSString*)text
NSMutableArray *results = [NSMutableArray array];

NSArray *urls = [self URLsInText:text];
NSArray *hashtags = [self hashtagsInText:text withURLEntities:urls];
[results addObjectsFromArray:urls];

NSArray *hashtags = [self hashtagsInText:text withURLEntities:urls];
[results addObjectsFromArray:hashtags];

NSArray *cashtags = [self cashtagsInText:text withURLEntities:urls];
[results addObjectsFromArray:cashtags];

NSArray *mentionsAndLists = [self mentionsOrListsInText:text];
NSMutableArray *addingItems = [NSMutableArray array];

Expand Down Expand Up @@ -361,7 +377,7 @@ + (NSArray*)URLsInText:(NSString*)text

while (1) {
position = NSMaxRange(allRange);
NSTextCheckingResult *urlResult = [validURLRegexp firstMatchInString:text options:0 range:NSMakeRange(position, len - position)];
NSTextCheckingResult *urlResult = [validURLRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!urlResult || urlResult.numberOfRanges < 9) {
break;
}
Expand Down Expand Up @@ -466,7 +482,7 @@ + (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
NSInteger position = 0;

while (1) {
NSTextCheckingResult *matchResult = [validHashtagRegexp firstMatchInString:text options:0 range:NSMakeRange(position, len - position)];
NSTextCheckingResult *matchResult = [validHashtagRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 2) {
break;
}
Expand Down Expand Up @@ -503,6 +519,61 @@ + (NSArray*)hashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
return results;
}

+ (NSArray*)cashtagsInText:(NSString*)text checkingURLOverlap:(BOOL)checkingURLOverlap
{
if (!text.length) {
return [NSArray array];
}

NSArray *urls = nil;
if (checkingURLOverlap) {
urls = [self URLsInText:text];
}
return [self cashtagsInText:text withURLEntities:urls];
}

+ (NSArray*)cashtagsInText:(NSString*)text withURLEntities:(NSArray*)urlEntities
{
if (!text.length) {
return [NSArray array];
}

if (!validCashtagRegexp) {
validCashtagRegexp = [[NSRegularExpression alloc] initWithPattern:TWUValidCashtag options:NSRegularExpressionCaseInsensitive error:NULL];
}

NSMutableArray *results = [NSMutableArray array];
NSInteger len = text.length;
NSInteger position = 0;

while (1) {
NSTextCheckingResult *matchResult = [validCashtagRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 2) {
break;
}

NSRange cashtagRange = [matchResult rangeAtIndex:1];
BOOL matchOk = YES;

// Check URL overlap
for (TwitterTextEntity *urlEntity in urlEntities) {
if (NSIntersectionRange(urlEntity.range, cashtagRange).length > 0) {
matchOk = NO;
break;
}
}

if (matchOk) {
TwitterTextEntity *entity = [TwitterTextEntity entityWithType:TwitterTextEntityCashtag range:cashtagRange];
[results addObject:entity];
}

position = NSMaxRange(matchResult.range);
}

return results;
}

+ (NSArray*)mentionedScreenNamesInText:(NSString*)text
{
if (!text.length) {
Expand Down Expand Up @@ -539,7 +610,7 @@ + (NSArray*)mentionsOrListsInText:(NSString*)text
NSInteger position = 0;

while (1) {
NSTextCheckingResult *matchResult = [validMentionOrListRegexp firstMatchInString:text options:0 range:NSMakeRange(position, len - position)];
NSTextCheckingResult *matchResult = [validMentionOrListRegexp firstMatchInString:text options:NSMatchingWithoutAnchoringBounds range:NSMakeRange(position, len - position)];
if (!matchResult || matchResult.numberOfRanges < 5) {
break;
}
Expand Down Expand Up @@ -586,7 +657,7 @@ + (TwitterTextEntity*)repliedScreenNameInText:(NSString*)text

NSInteger len = text.length;

NSTextCheckingResult *matchResult = [validReplyRegexp firstMatchInString:text options:0 range:NSMakeRange(0, len)];
NSTextCheckingResult *matchResult = [validReplyRegexp firstMatchInString:text options:(NSMatchingWithoutAnchoringBounds | NSMatchingAnchored) range:NSMakeRange(0, len)];
if (!matchResult || matchResult.numberOfRanges < 2) {
return nil;
}
Expand Down Expand Up @@ -657,7 +728,7 @@ + (int)remainingCharacterCount:(NSString*)text httpURLLength:(int)httpURLLength
TwitterTextEntity *entity = [urlEntities objectAtIndex:i];
NSRange urlRange = entity.range;
NSString *url = [string substringWithRange:urlRange];
if ([url rangeOfString:@"https" options:NSCaseInsensitiveSearch | NSAnchoredSearch].location == 0) {
if ([url rangeOfString:@"https" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location == 0) {
urlLengthOffset += httpsURLLength;
} else {
urlLengthOffset += httpURLLength;
Expand Down
1 change: 1 addition & 0 deletions lib/TwitterTextEntity.h
Expand Up @@ -17,6 +17,7 @@ typedef enum {
TwitterTextEntityScreenName,
TwitterTextEntityHashtag,
TwitterTextEntityListName,
TwitterTextEntityCashtag,
} TwitterTextEntityType;

@interface TwitterTextEntity : NSObject
Expand Down
3 changes: 3 additions & 0 deletions lib/TwitterTextEntity.m
Expand Up @@ -73,6 +73,9 @@ - (NSString*)description
case TwitterTextEntityListName:
typeString = @"ListName";
break;
case TwitterTextEntityCashtag:
typeString = @"Cashtag";
break;
}
return [NSString stringWithFormat:@"<%@: %@ %@>", NSStringFromClass([self class]), typeString, NSStringFromRange(range)];
}
Expand Down
63 changes: 63 additions & 0 deletions testproject/TwitterTextTests.m
Expand Up @@ -51,6 +51,8 @@ - (void)testExtract
NSArray *urlsWithIndices = [tests objectForKey:@"urls_with_indices"];
NSArray *hashtags = [tests objectForKey:@"hashtags"];
NSArray *hashtagsWithIndices = [tests objectForKey:@"hashtags_with_indices"];
NSArray *cashtags = [tests objectForKey:@"cashtags"];
NSArray *cashtagsWithIndices = [tests objectForKey:@"cashtags_with_indices"];

//
// Mentions
Expand Down Expand Up @@ -290,6 +292,67 @@ - (void)testExtract
STFail(@"Matching count is different: %lu != %lu\n%@", expected.count, results.count, testCase);
}
}

//
// Cashtag
//

for (NSDictionary *testCase in cashtags) {
NSString *text = [testCase objectForKey:@"text"];
NSArray *expected = [testCase objectForKey:@"expected"];

NSArray *results = [TwitterText cashtagsInText:text checkingURLOverlap:YES];
if (results.count == expected.count) {
int count = results.count;
for (int i=0; i<count; i++) {
NSString *expectedText = [expected objectAtIndex:i];

TwitterTextEntity *entity = [results objectAtIndex:i];
NSRange r = entity.range;
r.location++;
r.length--;
NSString *actualText = [text substringWithRange:r];

STAssertEqualObjects(expectedText, actualText, @"%@", testCase);
}
} else {
STFail(@"Matching count is different: %lu != %lu\n%@", expected.count, results.count, testCase);
NSLog(@"### %@", results);
}
}

//
// Cashtags with indices
//
for (NSDictionary *testCase in cashtagsWithIndices) {
NSString *text = [testCase objectForKey:@"text"];
NSArray *expected = [testCase objectForKey:@"expected"];

NSArray *results = [TwitterText cashtagsInText:text checkingURLOverlap:YES];
if (results.count == expected.count) {
int count = results.count;
for (int i=0; i<count; i++) {
NSDictionary *expectedDic = [expected objectAtIndex:i];
NSString *expectedCashtag = [expectedDic objectForKey:@"cashtag"];
NSArray *expectedIndices = [expectedDic objectForKey:@"indices"];
int expectedStart = [[expectedIndices objectAtIndex:0] intValue];
int expectedEnd = [[expectedIndices objectAtIndex:1] intValue];
NSRange expectedRange = NSMakeRange(expectedStart, expectedEnd - expectedStart);

TwitterTextEntity *entity = [results objectAtIndex:i];
NSRange actualRange = entity.range;
NSRange r = actualRange;
r.location++;
r.length--;
NSString *actualText = [text substringWithRange:r];

STAssertEqualObjects(expectedCashtag, actualText, @"%@", testCase);
STAssertTrue(NSEqualRanges(expectedRange, actualRange), @"%@ != %@\n%@", NSStringFromRange(expectedRange), NSStringFromRange(actualRange), testCase);
}
} else {
STFail(@"Matching count is different: %lu != %lu\n%@", expected.count, results.count, testCase);
}
}
}

@end

0 comments on commit e3c1425

Please sign in to comment.