Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Initial commit

  • Loading branch information...
commit 938224307b1b8de404193f51b47f91118ee7c415 0 parents
Eelco Lempsink eelco authored
30 LICENSE
... ... @@ -0,0 +1,30 @@
  1 +Copyright (c) 2010, Tupil
  2 +
  3 +All rights reserved.
  4 +
  5 +Redistribution and use in source and binary forms, with or without
  6 +modification, are permitted provided that the following conditions are met:
  7 +
  8 + * Redistributions of source code must retain the above copyright
  9 + notice, this list of conditions and the following disclaimer.
  10 +
  11 + * Redistributions in binary form must reproduce the above
  12 + copyright notice, this list of conditions and the following
  13 + disclaimer in the documentation and/or other materials provided
  14 + with the distribution.
  15 +
  16 + * Neither the name of Tupil nor the names of other
  17 + contributors may be used to endorse or promote products derived
  18 + from this software without specific prior written permission.
  19 +
  20 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21 +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22 +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23 +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24 +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25 +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26 +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27 +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28 +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 NSString+Hyphenate.h
... ... @@ -0,0 +1,28 @@
  1 +//
  2 +// NSString+Hyphenate.h
  3 +//
  4 +// Created by Eelco Lempsink on 09-06-10.
  5 +// Copyright 2010 Tupil. All rights reserved.
  6 +//
  7 +
  8 +#import <Foundation/Foundation.h>
  9 +
  10 +// This will not work out of the box! You'll need some files from the
  11 +// hyphen library and dictionaries. See the README.txt for more information.
  12 +
  13 +@interface NSString (Hyphenate)
  14 +
  15 +// Returns the string with added soft-hyphens (UTF-8 char x00AD).
  16 +//
  17 +// The hyphenation library will be loaded using the locale identifiers name
  18 +// (with the format hyph_%@.dic) and the locale will also be used to tokenize
  19 +// the string into words.
  20 +//
  21 +// If you pass nil as the locale, this function tries to use
  22 +// CFStringTokenizerCopyBestStringLanguage to guess the language.
  23 +//
  24 +// The loaded dictionary will be cached, so for the best performance group your
  25 +// hyphenation tasks per locale.
  26 +- (NSString*)stringByHyphenatingWithLocale:(NSLocale*)locale;
  27 +
  28 +@end
157 NSString+Hyphenate.m
... ... @@ -0,0 +1,157 @@
  1 +//
  2 +// NSString+Hyphenate.m
  3 +//
  4 +// Created by Eelco Lempsink on 09-06-10.
  5 +// Copyright 2010 Tupil. All rights reserved.
  6 +//
  7 +
  8 +#import "NSString+Hyphenate.h"
  9 +
  10 +#include "hyphen.h"
  11 +
  12 +@implementation NSString (Hyphenate)
  13 +
  14 +- (NSString*)stringByHyphenatingWithLocale:(NSLocale*)locale {
  15 + static HyphenDict* dict = NULL;
  16 + static NSString* localeIdentifier = nil;
  17 + static NSBundle* bundle = nil;
  18 +
  19 + ////////////////////////////////////////////////////////////////////////////
  20 + // Setup.
  21 + //
  22 + // Establish that we got all the information we need: the bundle with
  23 + // dictionaries, the locale and the loaded dictionary. Cache dictionary and
  24 + // save the language code used to retrieve it.
  25 + //
  26 +
  27 + // Try to guess the locale from the string, if not given.
  28 + CFStringRef language;
  29 + if (locale == nil
  30 + && (language = CFStringTokenizerCopyBestStringLanguage(
  31 + (CFStringRef)self, CFRangeMake(0, [self length]))))
  32 + {
  33 + locale = [[[NSLocale alloc]
  34 + initWithLocaleIdentifier:(NSString*)language] autorelease];
  35 + CFRelease(language);
  36 + }
  37 +
  38 + if (locale == nil) {
  39 + return self;
  40 + } // else
  41 +
  42 + if (![localeIdentifier isEqualToString:[locale localeIdentifier]]
  43 + && dict != NULL)
  44 + {
  45 + hnj_hyphen_free(dict);
  46 + dict = NULL;
  47 + }
  48 +
  49 + localeIdentifier = [locale localeIdentifier];
  50 +
  51 + if (bundle == nil) {
  52 + NSString* bundlePath = [[[NSBundle mainBundle] resourcePath]
  53 + stringByAppendingPathComponent:
  54 + @"Hyphenate.bundle"];
  55 + bundle = [NSBundle bundleWithPath:bundlePath];
  56 + }
  57 +
  58 + if (dict == NULL) {
  59 + dict = hnj_hyphen_load([[bundle pathForResource:
  60 + [NSString stringWithFormat:@"hyph_%@",
  61 + localeIdentifier]
  62 + ofType:@"dic"]
  63 + UTF8String]);
  64 + }
  65 +
  66 + if (dict == NULL) {
  67 + return self;
  68 + } // else
  69 +
  70 + ////////////////////////////////////////////////////////////////////////////
  71 + // The works.
  72 + //
  73 + // No turning back now. We traverse the string using a tokenizer and pass
  74 + // every word we find into the hyphenation function. Non-used tokens and
  75 + // hyphenated words will be appended to the result string.
  76 + //
  77 +
  78 + NSMutableString* result = [NSMutableString stringWithCapacity:
  79 + [self length] * 1.2];
  80 +
  81 + // Varibles used for tokenizing
  82 + CFStringTokenizerRef tokenizer;
  83 + CFStringTokenizerTokenType tokenType;
  84 + CFRange tokenRange;
  85 + NSString* token;
  86 +
  87 + // Varibles used for hyphenation
  88 + char* hyphens;
  89 + char** rep;
  90 + int* pos;
  91 + int* cut;
  92 + int wordLength;
  93 + int i;
  94 +
  95 + tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault,
  96 + (CFStringRef)self,
  97 + CFRangeMake(0, [self length]),
  98 + kCFStringTokenizerUnitWordBoundary,
  99 + (CFLocaleRef)locale);
  100 +
  101 + while ((tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer))
  102 + != kCFStringTokenizerTokenNone)
  103 + {
  104 + tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer);
  105 + token = [self substringWithRange:
  106 + NSMakeRange(tokenRange.location, tokenRange.length)];
  107 +
  108 + if (tokenType & kCFStringTokenizerTokenHasNonLettersMask) {
  109 + [result appendString:token];
  110 + } else {
  111 + wordLength = tokenRange.length;
  112 + // This is the buffer size the algorithm needs.
  113 + hyphens = (char*)malloc(wordLength + 5); // +5, see hypen.h
  114 + rep = NULL; // Will be allocated by the algorithm
  115 + pos = NULL; // Idem
  116 + cut = NULL; // Idem
  117 +
  118 + // rep, pos and cut are not currently used, but the simpler
  119 + // hyphenation function is deprecated.
  120 + hnj_hyphen_hyphenate2(dict, [[token lowercaseString] UTF8String],
  121 + wordLength, hyphens, NULL, &rep, &pos, &cut);
  122 +
  123 + NSUInteger loc = 0;
  124 + NSUInteger len = 0;
  125 + for (i = 0; i < wordLength; i++) {
  126 + if (hyphens[i] & 1) {
  127 + len = i - loc + 1;
  128 + [result appendString:
  129 + [token substringWithRange:NSMakeRange(loc, len)]];
  130 + [result appendString:@"­"]; // NOTE: UTF-8 soft hyphen!
  131 + loc = loc + len;
  132 + }
  133 + }
  134 + if (loc < wordLength) {
  135 + [result appendString:
  136 + [token substringWithRange:NSMakeRange(loc, wordLength - loc)]];
  137 + }
  138 +
  139 + // Clean up
  140 + free(hyphens);
  141 + if (rep) {
  142 + for (i = 0; i < [self length]; i++) {
  143 + if (rep[i]) free(rep[i]);
  144 + }
  145 + free(rep);
  146 + free(pos);
  147 + free(cut);
  148 + }
  149 + }
  150 + }
  151 +
  152 + CFRelease(tokenizer);
  153 +
  154 + return result;
  155 +}
  156 +
  157 +@end
0  README.txt
No changes.

0 comments on commit 9382243

Please sign in to comment.
Something went wrong with that request. Please try again.