在语义的世界里,可以近似地说:万事万物都是特征提取。?你只要找到特征,事情就好办。…………你期望毕其功于一役吗?自然语言处理的真实应用里是很难有什么场景找到一个通吃特征的。都是一层一层特征叠加的。?一层特征去掉一部分垃圾数据。如此反复,终成正果。注意方法论。
统计粗且糙,乃大锤。规则细而精,乃小锤。先大场后细棋。
KafkaSink.java
|
import?kafka.javaapi.producer.Producer;
……
public class?KafkaSink?extends?AbstractSink?implements?Configurable {
……
?private?Producer<String,?byte[]> producer;
……
?@Override
?public?Status process()?throws?EventDeliveryException {
??Channel?channel = getChannel();
? Transaction tx = channel.getTransaction();
??try?{
? ?tx.begin();
? ?Event?e = channel.take();
? ?if?(e ==?null) {
? ? tx.rollback();
? ??return?Status.BACKOFF;
? ?}
? ?producer.send(new?KeyedMessage<String,?byte[]>(topic, e.getBody()));
? ?tx.commit();?
? ?return?Status.READY;
? }?catch?(Exception?e) {
|
KafkaSpout.java
|
public abstract class?KafkaSpout?implements?IRichSpout {
……
?@Override
?public?void?activate() {
……
? ?for?(final?KafkaStream<byte[], byte[]> stream : streamList) {
? ? executor.submit(new?Runnable() {
? ? ?@Override
? ? ?public void?run() {
? ? ? ConsumerIterator<byte[],?byte[]> iterator = stream.iterator();
? ? ??while?(iterator.hasNext()) {
? ? ? ?if?(spoutPending.get() <= 0) {
? ? ? ? sleep(1000);
? ? ? ??continue;
? ? ? ?}
? ? ? ?MessageAndMetadata<byte[],?byte[]> next = iterator.next();
? ? ? ?byte[] message = next.message();
? ? ? ?List<Object> tuple =?null;
? ? ? ?try?{
? ? ? ??tuple = generateTuple(message);
? ? ? ?}?catch?(Exception e) {
? ? ? ? e.printStackTrace();
? ? ? ?}
? ? ? ?if (tuple ==?null?|| tuple.size() != outputFieldsLength) {
? ? ? ??continue;
? ? ? ?}
? ? ? ?collector.emit(tuple);
? ? ? ?spoutPending.decrementAndGet();
? ? ? }
? ? ?}
|
EvaluateBolt.java
|
public class?EvaluateBolt?extends?BaseBasicBolt {
……
?@Override
?public void?execute(Tuple input, BasicOutputCollector collector) {
……
??if?(LogWebsiteSpout.PAGE_EVENT_BROWSE.equals(event)) {
? ?if?(LogWebsiteSpout.PAGE_TYPE_GOODS.equals(pageType)) {
? ? incrBaseStatistics(baseKeyMap, BROWSE_ALL, 1);
? ?}?else if?(LogWebsiteSpout.PAGE_TYPE_PAY1.equals(pageType)) {
? ? incrBaseStatistics(baseKeyMap, ORDER_ALL, 1);
? ?}
? ?String recDisplay = input.getStringByField(LogWebsiteSpout.FIELD_REC_DISPLAY);
? ?recDisplayStatistics(recDisplay, time, pageType, baseKeyMap);
? }?else if?(LogWebsiteSpout.PAGE_EVENT_CLICK.equals(event)) {
? ?String recType = input.getStringByField(LogWebsiteSpout.FIELD_REC_TYPE);
|
#研发解决方案介绍#Recsys-Evaluate(推荐评测)
原文:http://zhengyun-ustc.iteye.com/blog/2167054