This repository has been archived by the owner on Jan 15, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #113 from vrushalic/etl_aggregate_2
- Loading branch information
Showing
26 changed files
with
1,744 additions
and
335 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
133 changes: 133 additions & 0 deletions
133
hraven-core/src/main/java/com/twitter/hraven/AggregationConstants.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* | ||
Copyright 2014 Twitter, Inc. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package com.twitter.hraven; | ||
|
||
import org.apache.hadoop.hbase.util.Bytes; | ||
|
||
/** | ||
* defines the aggregation related constants | ||
* | ||
*/ | ||
public class AggregationConstants { | ||
|
||
// name the table as job_history_agg_daily | ||
public static final String AGG_DAILY_TABLE = Constants.HISTORY_TABLE + "_agg_daily"; | ||
public static final byte[] AGG_DAILY_TABLE_BYTES = Bytes.toBytes(AGG_DAILY_TABLE); | ||
|
||
//name the table as job_history_agg_weekly | ||
public static final String AGG_WEEKLY_TABLE = Constants.HISTORY_TABLE + "_agg_weekly"; | ||
public static final byte[] AGG_WEEKLY_TABLE_BYTES = Bytes.toBytes(AGG_WEEKLY_TABLE); | ||
|
||
public static final String INFO_FAM = "i"; | ||
public static final byte[] INFO_FAM_BYTES = Bytes.toBytes(INFO_FAM); | ||
|
||
/** | ||
* The s column family has a TTL of 30 days | ||
* It's used as a scratch column family | ||
* It stores the run ids that are seen for that day | ||
* we assume that a flow will not run for more than 30 days, | ||
* hence it's fine to "expire" that data | ||
*/ | ||
public static final String SCRATCH_FAM = "s"; | ||
public static final byte[] SCRATCH_FAM_BYTES = Bytes.toBytes(SCRATCH_FAM); | ||
|
||
/** parameter that specifies whether or not to aggregate */ | ||
public static final String AGGREGATION_FLAG_NAME = "aggregate"; | ||
|
||
/** | ||
* name of the flag that determines whether or not re-aggregate | ||
* (overrides aggregation status in raw table for that job) | ||
*/ | ||
public static final String RE_AGGREGATION_FLAG_NAME = "reaggregate"; | ||
|
||
/** column name for app id in aggregation table */ | ||
public static final String APP_ID_COL = "app_id"; | ||
public static final byte[] APP_ID_COL_BYTES = Bytes.toBytes(APP_ID_COL.toLowerCase()); | ||
|
||
/** | ||
* the number of runs in an aggregation | ||
*/ | ||
public static final String NUMBER_RUNS = "number_runs"; | ||
|
||
/** raw bytes representation of the number of runs parameter */ | ||
public static final byte[] NUMBER_RUNS_BYTES = Bytes.toBytes(NUMBER_RUNS.toLowerCase()); | ||
|
||
/** | ||
* the user who ran this app | ||
*/ | ||
public static final String USER = "user"; | ||
public static final byte[] USER_BYTES = Bytes.toBytes(USER.toLowerCase()); | ||
|
||
/** | ||
* the number of jobs in an aggregation | ||
*/ | ||
public static final String TOTAL_JOBS = "total_jobs"; | ||
|
||
/** raw bytes representation of the number jobs parameter */ | ||
public static final byte[] TOTAL_JOBS_BYTES = Bytes.toBytes(TOTAL_JOBS.toLowerCase()); | ||
|
||
/** | ||
* use this config setting to define an hadoop-version-independent property for queuename | ||
*/ | ||
public static final String HRAVEN_QUEUE = "queue"; | ||
|
||
/** raw bytes representation of the queue parameter */ | ||
public static final byte[] HRAVEN_QUEUE_BYTES = Bytes.toBytes(HRAVEN_QUEUE.toLowerCase()); | ||
|
||
/** total maps and reduces */ | ||
public static final String TOTAL_MAPS = "total_maps"; | ||
public static final byte[] TOTAL_MAPS_BYTES = Bytes.toBytes(TOTAL_MAPS.toLowerCase()); | ||
public static final String TOTAL_REDUCES = "total_reduces"; | ||
public static final byte[] TOTAL_REDUCES_BYTES = Bytes.toBytes(TOTAL_REDUCES.toLowerCase()); | ||
|
||
/** slot millis for maps and reduces */ | ||
public static final String SLOTS_MILLIS_MAPS = "slots_millis_maps"; | ||
public static final byte[] SLOTS_MILLIS_MAPS_BYTES = Bytes.toBytes(SLOTS_MILLIS_MAPS | ||
.toLowerCase()); | ||
public static final String SLOTS_MILLIS_REDUCES = "slots_millis_reduces"; | ||
public static final byte[] SLOTS_MILLIS_REDUCES_BYTES = Bytes.toBytes(SLOTS_MILLIS_REDUCES | ||
.toLowerCase()); | ||
|
||
/** used to indicate how expensive a job is in terms of memory and time taken */ | ||
public static final String MEGABYTEMILLIS = "megabytemillis"; | ||
public static final byte[] MEGABYTEMILLIS_BYTES = Bytes.toBytes(MEGABYTEMILLIS.toLowerCase()); | ||
|
||
/** used to indicate the cost of a job is in terms of currency units */ | ||
public static final String JOBCOST = "jobcost"; | ||
public static final byte[] JOBCOST_BYTES = Bytes.toBytes(JOBCOST.toLowerCase()); | ||
|
||
public static final String JOB_DAILY_AGGREGATION_STATUS_COL = "daily_aggregation_status"; | ||
public static final byte[] JOB_DAILY_AGGREGATION_STATUS_COL_BYTES = | ||
Bytes.toBytes(JOB_DAILY_AGGREGATION_STATUS_COL); | ||
|
||
public static final String JOB_WEEKLY_AGGREGATION_STATUS_COL = "weekly_aggregation_status"; | ||
public static final byte[] JOB_WEEKLY_AGGREGATION_STATUS_COL_BYTES = | ||
Bytes.toBytes(JOB_WEEKLY_AGGREGATION_STATUS_COL); | ||
|
||
/** | ||
* number of retries for check and put | ||
*/ | ||
public static final int RETRY_COUNT = 100; | ||
|
||
/** | ||
* type of aggregation : daily, weekly | ||
*/ | ||
public enum AGGREGATION_TYPE { | ||
DAILY, | ||
WEEKLY; | ||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
hraven-core/src/main/java/com/twitter/hraven/AppAggregationKey.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/* | ||
Copyright 2014 Twitter, Inc. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package com.twitter.hraven; | ||
|
||
/** | ||
* Represents the row key that stores Aggregations for an app | ||
*/ | ||
public class AppAggregationKey extends AppKey { | ||
|
||
/** | ||
* timestamp stored as part of row key in aggregation table | ||
* this is usually top of the day or top of the week timestamp | ||
* All apps that belong to that day (or that week for weekly aggregations) | ||
* have the same aggregation id | ||
* | ||
* If a {@link Flow} (like a pig job or a scalding job) spans more than a 1 day, | ||
* the aggregationId is the day that the first job in that Flow started running, | ||
* which is the submitTime or runId of that {@link Flow} | ||
*/ | ||
private long aggregationId; | ||
|
||
public AppAggregationKey(String cluster, String userName, String appId, Long ts) { | ||
super(cluster, userName, appId); | ||
this.setAggregationId(ts); | ||
} | ||
|
||
public long getAggregationId() { | ||
return aggregationId; | ||
} | ||
|
||
public void setAggregationId(long aggregationId) { | ||
this.aggregationId = aggregationId; | ||
} | ||
|
||
/** | ||
* Encodes the given timestamp for ordering by run ID | ||
*/ | ||
public static long encodeAggregationId(long timestamp) { | ||
return Long.MAX_VALUE - timestamp; | ||
} | ||
|
||
/** | ||
* Inverted version of {@link AppAggregationKey#getaggregationId()} used in the byte representation for | ||
* reverse chronological sorting. | ||
* @return | ||
*/ | ||
public long getEncodedAggregationId() { | ||
return encodeAggregationId(aggregationId); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.