/
AbstractHadoopStoreBuilderMapper.java
119 lines (102 loc) · 4.8 KB
/
AbstractHadoopStoreBuilderMapper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*
* Copyright 2008-2009 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package voldemort.store.readonly.mr;
import java.io.IOException;
import java.security.MessageDigest;
import java.util.List;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import voldemort.cluster.Node;
import voldemort.routing.ConsistentRoutingStrategy;
import voldemort.serialization.DefaultSerializerFactory;
import voldemort.serialization.Serializer;
import voldemort.serialization.SerializerDefinition;
import voldemort.store.compress.CompressionStrategy;
import voldemort.store.compress.CompressionStrategyFactory;
import voldemort.utils.ByteUtils;
/**
* A base class that can be used for building voldemort read-only stores. To use
* it you need to override the makeKey and makeValue methods which specify how
* to construct the key and value from the values given in map().
*
* The values given by makeKey and makeValue will then be serialized with the
* appropriate voldemort Serializer.
*
*
*/
public abstract class AbstractHadoopStoreBuilderMapper<K, V> extends
AbstractStoreBuilderConfigurable implements Mapper<K, V, BytesWritable, BytesWritable> {
private MessageDigest md5er;
private ConsistentRoutingStrategy routingStrategy;
private Serializer<Object> keySerializer;
private Serializer<Object> valueSerializer;
private CompressionStrategy valueCompressor;
private CompressionStrategy keyCompressor;
private SerializerDefinition keySerializerDefinition;
private SerializerDefinition valueSerializerDefinition;
public abstract Object makeKey(K key, V value);
public abstract Object makeValue(K key, V value);
/**
* Create the voldemort key and value from the input key and value and map
* it out for each of the responsible voldemort nodes
*
* The output key is the md5 of the serialized key returned by makeKey().
* The output value is the nodeid of the responsible node followed by
* serialized value returned by makeValue().
*/
public void map(K key,
V value,
OutputCollector<BytesWritable, BytesWritable> output,
Reporter reporter) throws IOException {
byte[] keyBytes = keySerializer.toBytes(makeKey(key, value));
byte[] valBytes = valueSerializer.toBytes(makeValue(key, value));
// compress key and values if required
if(keySerializerDefinition.hasCompression()) {
keyBytes = keyCompressor.deflate(keyBytes);
}
if(valueSerializerDefinition.hasCompression()) {
valBytes = valueCompressor.deflate(valBytes);
}
// copy the bytes into an array with 4 additional bytes for the node id
byte[] nodeIdAndValue = new byte[valBytes.length + 4];
System.arraycopy(valBytes, 0, nodeIdAndValue, 4, valBytes.length);
BytesWritable outputKey = new BytesWritable(md5er.digest(keyBytes));
List<Node> nodes = routingStrategy.routeRequest(keyBytes);
for(Node node: nodes) {
ByteUtils.writeInt(nodeIdAndValue, node.getId(), 0);
BytesWritable outputVal = new BytesWritable(nodeIdAndValue);
output.collect(outputKey, outputVal);
}
md5er.reset();
}
@Override
@SuppressWarnings("unchecked")
public void configure(JobConf conf) {
super.configure(conf);
md5er = ByteUtils.getDigest("md5");
keySerializerDefinition = getStoreDef().getKeySerializer();
valueSerializerDefinition = getStoreDef().getValueSerializer();
keySerializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(keySerializerDefinition);
valueSerializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(valueSerializerDefinition);
keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression());
valueCompressor = new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression());
routingStrategy = new ConsistentRoutingStrategy(getCluster().getNodes(),
getStoreDef().getReplicationFactor());
}
}