Friday, November 11, 2016

ToDate Format in Pig

EEE MMM dd HH:mm:ss z yyyy

"yyyy-MM-dd"
yyyy/MM/dd"
"yyyy.MM.dd"
       
MM/dd/yyyy
"MM-dd-yyyy
"MM.dd.yyyy"

"dd-MM-yyyy"
"dd/MM/yyyy"
"dd.MM.yyyy"
       
       
  "M/dd/yyyy"
"dd.M.yyyy"
"M/dd/yyyy hh:mm:ss a"
"dd.M.yyyy hh:mm:ss a"
"dd.MMM.yyyy"
"dd-MMM-yyyy"
"yyyy/dd/MM"

data = LOAD ' part-r-00000.bz2'
using PigStorage('\t') AS (
 yuid: chararray,
 domain: chararray,
 templateid: int,
 messageid: chararray,
 senderdate: chararray,
 template: chararray
 );

toISOA = FOREACH data GENERATE yuid, REGEX_EXTRACT(senderdate, '((.*)\\,)?\\s([1-9]+\\s(.*)\\s(19|20)\\d{2})(.*)',3) as sdate;
toISOA1 = FILTER toISOA BY string.LENGTH(sdate)>=10
toISOA2 = FOREACH toISOA1 GENERATE yuid, REPLACE(sdate,' ','-') AS sdate1;
toISOA3 = FOREACH toISOA2 GENERATE yuid, SUBSTRING(CustomFormatToISO(sdate1, 'dd-MMM-yyyy'), 0, 10) as mail_date;
by_mail_date = GROUP toISOA3 BY mail_date;
by_mail_date1 = FOREACH by_mail_date GENERATE group, COUNT($1) AS count;
STORE by_mail_date1 INTO 'by_mail_date1';

No comments:

Post a Comment

Blog Archive