Permalink
Browse files

initial import

  • Loading branch information...
0 parents commit 65a3da4c5c2c32cfbb6f39e08b5571de0488281f @jdf jdf committed Dec 2, 2009
Showing with 10,243 additions and 0 deletions.
  1. +6 −0 .classpath
  2. +17 −0 .project
  3. +268 −0 .settings/org.eclipse.jdt.core.prefs
  4. +109 −0 .settings/org.eclipse.jdt.ui.prefs
  5. +135 −0 src/cue/lang/Counter.java
  6. +11 −0 src/cue/lang/IterableText.java
  7. +152 −0 src/cue/lang/NGramIterator.java
  8. +76 −0 src/cue/lang/SentenceIterator.java
  9. +63 −0 src/cue/lang/WordIterator.java
  10. +145 −0 src/cue/lang/stop/StopWords.java
  11. +351 −0 src/cue/lang/stop/arabic
  12. +219 −0 src/cue/lang/stop/catalan
  13. +2,024 −0 src/cue/lang/stop/croatian
  14. +256 −0 src/cue/lang/stop/czech
  15. +94 −0 src/cue/lang/stop/danish
  16. +107 −0 src/cue/lang/stop/dutch
  17. +182 −0 src/cue/lang/stop/english
  18. +180 −0 src/cue/lang/stop/esperanto
  19. +966 −0 src/cue/lang/stop/farsi
  20. +235 −0 src/cue/lang/stop/finnish
  21. +157 −0 src/cue/lang/stop/french
  22. +232 −0 src/cue/lang/stop/german
  23. +637 −0 src/cue/lang/stop/greek
  24. +220 −0 src/cue/lang/stop/hebrew
  25. +97 −0 src/cue/lang/stop/hindi
  26. +202 −0 src/cue/lang/stop/hungarian
  27. +279 −0 src/cue/lang/stop/italian
  28. +1 −0 src/cue/lang/stop/latin
  29. +176 −0 src/cue/lang/stop/norwegian
  30. +138 −0 src/cue/lang/stop/polish
  31. +204 −0 src/cue/lang/stop/portuguese
  32. +284 −0 src/cue/lang/stop/romanian
  33. +652 −0 src/cue/lang/stop/russian
  34. +110 −0 src/cue/lang/stop/slovak
  35. +448 −0 src/cue/lang/stop/slovenian
  36. +308 −0 src/cue/lang/stop/spanish
  37. +114 −0 src/cue/lang/stop/swedish
  38. +117 −0 src/cue/lang/stop/turkish
  39. +112 −0 src/cue/lang/unicode/BlockUtil.java
  40. +58 −0 src/cue/lang/unicode/Normalizer.java
  41. +67 −0 src/cue/lang/unicode/Normalizer5.java
  42. +34 −0 src/cue/lang/unicode/Normalizer6.java
  43. BIN src/cue/lang/unicode/normtable.bin
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="output" path="bin"/>
+</classpath>
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>cue.language</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
Oops, something went wrong.
@@ -0,0 +1,109 @@
+#Fri Nov 20 14:49:27 EST 2009
+cleanup.add_default_serial_version_id=true
+cleanup.add_generated_serial_version_id=false
+cleanup.add_missing_annotations=true
+cleanup.add_missing_deprecated_annotations=true
+cleanup.add_missing_methods=false
+cleanup.add_missing_nls_tags=false
+cleanup.add_missing_override_annotations=true
+cleanup.add_serial_version_id=false
+cleanup.always_use_blocks=true
+cleanup.always_use_parentheses_in_expressions=false
+cleanup.always_use_this_for_non_static_field_access=false
+cleanup.always_use_this_for_non_static_method_access=false
+cleanup.convert_to_enhanced_for_loop=true
+cleanup.correct_indentation=true
+cleanup.format_source_code=true
+cleanup.format_source_code_changes_only=false
+cleanup.make_local_variable_final=true
+cleanup.make_parameters_final=true
+cleanup.make_private_fields_final=true
+cleanup.make_type_abstract_if_missing_method=false
+cleanup.make_variable_declarations_final=true
+cleanup.never_use_blocks=false
+cleanup.never_use_parentheses_in_expressions=true
+cleanup.organize_imports=true
+cleanup.qualify_static_field_accesses_with_declaring_class=false
+cleanup.qualify_static_member_accesses_through_instances_with_declaring_class=true
+cleanup.qualify_static_member_accesses_through_subtypes_with_declaring_class=true
+cleanup.qualify_static_member_accesses_with_declaring_class=true
+cleanup.qualify_static_method_accesses_with_declaring_class=false
+cleanup.remove_private_constructors=true
+cleanup.remove_trailing_whitespaces=true
+cleanup.remove_trailing_whitespaces_all=true
+cleanup.remove_trailing_whitespaces_ignore_empty=false
+cleanup.remove_unnecessary_casts=true
+cleanup.remove_unnecessary_nls_tags=true
+cleanup.remove_unused_imports=true
+cleanup.remove_unused_local_variables=false
+cleanup.remove_unused_private_fields=true
+cleanup.remove_unused_private_members=false
+cleanup.remove_unused_private_methods=true
+cleanup.remove_unused_private_types=true
+cleanup.sort_members=false
+cleanup.sort_members_all=false
+cleanup.use_blocks=true
+cleanup.use_blocks_only_for_return_and_throw=false
+cleanup.use_parentheses_in_expressions=false
+cleanup.use_this_for_non_static_field_access=false
+cleanup.use_this_for_non_static_field_access_only_if_necessary=true
+cleanup.use_this_for_non_static_method_access=false
+cleanup.use_this_for_non_static_method_access_only_if_necessary=true
+cleanup_profile=_Bracing
+cleanup_settings_version=2
+eclipse.preferences.version=1
+editor_save_participant_org.eclipse.jdt.ui.postsavelistener.cleanup=true
+formatter_profile=_jdf
+formatter_settings_version=11
+org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates/>
+sp_cleanup.add_default_serial_version_id=true
+sp_cleanup.add_generated_serial_version_id=false
+sp_cleanup.add_missing_annotations=true
+sp_cleanup.add_missing_deprecated_annotations=true
+sp_cleanup.add_missing_methods=false
+sp_cleanup.add_missing_nls_tags=false
+sp_cleanup.add_missing_override_annotations=true
+sp_cleanup.add_serial_version_id=false
+sp_cleanup.always_use_blocks=true
+sp_cleanup.always_use_parentheses_in_expressions=false
+sp_cleanup.always_use_this_for_non_static_field_access=false
+sp_cleanup.always_use_this_for_non_static_method_access=false
+sp_cleanup.convert_to_enhanced_for_loop=true
+sp_cleanup.correct_indentation=false
+sp_cleanup.format_source_code=true
+sp_cleanup.format_source_code_changes_only=false
+sp_cleanup.make_local_variable_final=false
+sp_cleanup.make_parameters_final=true
+sp_cleanup.make_private_fields_final=false
+sp_cleanup.make_type_abstract_if_missing_method=false
+sp_cleanup.make_variable_declarations_final=true
+sp_cleanup.never_use_blocks=false
+sp_cleanup.never_use_parentheses_in_expressions=true
+sp_cleanup.on_save_use_additional_actions=true
+sp_cleanup.organize_imports=true
+sp_cleanup.qualify_static_field_accesses_with_declaring_class=false
+sp_cleanup.qualify_static_member_accesses_through_instances_with_declaring_class=true
+sp_cleanup.qualify_static_member_accesses_through_subtypes_with_declaring_class=true
+sp_cleanup.qualify_static_member_accesses_with_declaring_class=false
+sp_cleanup.qualify_static_method_accesses_with_declaring_class=false
+sp_cleanup.remove_private_constructors=true
+sp_cleanup.remove_trailing_whitespaces=false
+sp_cleanup.remove_trailing_whitespaces_all=true
+sp_cleanup.remove_trailing_whitespaces_ignore_empty=false
+sp_cleanup.remove_unnecessary_casts=true
+sp_cleanup.remove_unnecessary_nls_tags=false
+sp_cleanup.remove_unused_imports=false
+sp_cleanup.remove_unused_local_variables=false
+sp_cleanup.remove_unused_private_fields=true
+sp_cleanup.remove_unused_private_members=false
+sp_cleanup.remove_unused_private_methods=true
+sp_cleanup.remove_unused_private_types=true
+sp_cleanup.sort_members=false
+sp_cleanup.sort_members_all=false
+sp_cleanup.use_blocks=true
+sp_cleanup.use_blocks_only_for_return_and_throw=false
+sp_cleanup.use_parentheses_in_expressions=false
+sp_cleanup.use_this_for_non_static_field_access=false
+sp_cleanup.use_this_for_non_static_field_access_only_if_necessary=true
+sp_cleanup.use_this_for_non_static_method_access=false
+sp_cleanup.use_this_for_non_static_method_access_only_if_necessary=true
@@ -0,0 +1,135 @@
+/*
+ Copyright 2009 IBM Corp
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+package cue.lang;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+
+/**
+ *
+ * @author Jonathan Feinberg <jdf@us.ibm.com>
+ *
+ */
+public class Counter<T>
+{
+ // delegate, don't extend, to prevent unauthorized monkeying with internals
+ private final Map<T, Integer> items = new HashMap<T, Integer>();
+ private int totalItemCount = 0;
+
+ public Counter()
+ {
+ }
+
+ public Counter(final Iterable<T> items)
+ {
+ noteAll(items);
+ }
+
+ public void noteAll(final Iterable<T> items)
+ {
+ for (final T t : items)
+ {
+ note(t, 1);
+ }
+ }
+
+ public void note(final T item)
+ {
+ note(item, 1);
+ }
+
+ public void note(final T item, final int count)
+ {
+ final Integer existingCount = items.get(item);
+ if (existingCount != null)
+ {
+ items.put(item, existingCount + count);
+ }
+ else
+ {
+ items.put(item, count);
+ }
+ totalItemCount += count;
+ }
+
+ public int getTotalItemCount()
+ {
+ return totalItemCount;
+ }
+
+ private final Comparator<Entry<T, Integer>> BY_FREQ_DESC = new Comparator<Entry<T, Integer>>() {
+ public int compare(final Entry<T, Integer> o1, final Entry<T, Integer> o2)
+ {
+ return o2.getValue() - o1.getValue();
+ }
+ };
+
+ /**
+ * @param n
+ * @return A list of the min(n, size()) most frequent items
+ */
+ public List<T> getMostFrequent(final int n)
+ {
+ ArrayList<Entry<T, Integer>> all = new ArrayList<Entry<T, Integer>>(items
+ .entrySet());
+ Collections.sort(all, BY_FREQ_DESC);
+ final int resultSize = Math.min(n, items.size());
+ final List<T> result = new ArrayList<T>(resultSize);
+ for (final Entry<T, Integer> e : all.subList(0, resultSize))
+ {
+ result.add(e.getKey());
+ }
+ return Collections.unmodifiableList(result);
+ }
+
+ public Integer getCount(final T item)
+ {
+ final Integer freq = items.get(item);
+ if (freq == null)
+ {
+ return 0;
+ }
+ return freq;
+ }
+
+ public void clear()
+ {
+ items.clear();
+ }
+
+ public Set<Entry<T, Integer>> entrySet()
+ {
+ return Collections.unmodifiableSet(items.entrySet());
+ }
+
+ public Set<T> keySet()
+ {
+ return Collections.unmodifiableSet(items.keySet());
+ }
+
+ @Override
+ public String toString()
+ {
+ return items.toString();
+ }
+}
@@ -0,0 +1,11 @@
+package cue.lang;
+
+import java.util.Iterator;
+
+abstract class IterableText implements Iterator<String>, Iterable<String>
+{
+ public Iterator<String> iterator()
+ {
+ return this;
+ }
+}
Oops, something went wrong.

0 comments on commit 65a3da4

Please sign in to comment.