-
Notifications
You must be signed in to change notification settings - Fork 0
/
language.rb
121 lines (81 loc) · 2.35 KB
/
language.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# coding: utf-8
#--------------------------------------------------------------------------
# language.rb
# Blogdemos/Language Confluxer
#
# Created by Egor Chiglintsev on December 14, 2013.
# Copyright (c) 2013 Egor Chiglintsev. All rights reserved.
#--------------------------------------------------------------------------
require "unicode" # Need unicode gem for UTF-8 downcase
require "set" # Set class
module Language
class Confluxer
def initialize(*filenames)
@mapping = Hash.new
@starting_pairs = Set.new
filenames.each do |filename|
process_file(filename)
end
end
def next(length)
return unless length > 2
pair = @starting_pairs.to_a[rand @starting_pairs.length]
string = pair.dup
length -= 2 #first pair is already two chars
while length > 0 do
letters = @mapping[pair]
return string if letters == nil
nextLetter = letters[rand letters.length]
string << nextLetter
pair = string[-2,2]
length -= 1
end
string
end
private
def process_file(filename)
File.open(filename, "r") do |file_handle|
file_handle.each_line do |line|
process_line(line)
end
end
end
def process_line(line)
line.force_encoding(Encoding::UTF_8)
stripped = line.strip
downcase = Unicode::downcase(stripped)
downcase.each_confluxer_group(2,1) do |pair, char, index|
add_to_mapping(pair, char)
@starting_pairs << pair if index == 0
end
end
def add_to_mapping(prefix, value)
return unless value.length > 0
values = @mapping[prefix]
unless values
@mapping[prefix] = values = Array.new
end
values << value
end
end # Confluxer
end # Language
class String
def each_confluxer_group(*lengths)
n = lengths.inject {|sum, l| sum += l}
return if self.length < n
result = 0.upto(self.length-n).collect {|i| get_confluxer_group(i, *lengths) }
if block_given?
result.each_with_index {|group, i| yield *group, i}
else
result
end
end
def get_confluxer_group(start, *lengths)
subgroups = Array.new
lengths.inject(start) do |index, len|
subgroups << self[index, len]
index += len
end
subgroups
end
end