Commit 265b180a authored by maliang's avatar maliang

clean

parent 18a22379
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="clean" />
</profile>
</annotationProcessing>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ScalaProjectSettings">
<option name="customScalatestSyntaxHighlighting" value="true" />
</component>
</project>
\ No newline at end of file
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4" />
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.novel.clean</groupId>
<artifactId>clean</artifactId>
<version>1.0</version>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.2</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
package com.novel.clean;
import java.io.IOException;
import java.util.Map;
import com.novel.clean.util.CleanUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CleanApi extends Mapper<LongWritable, Text, Text, NullWritable>{
private Text line = new Text();
private CleanUtil cleanUtil=new CleanUtil();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] list = value.toString().split("] \\[");
String logversion = "-999";
String projectname = "-999";
String serverip = "-999"; //服务器ip
String reqbegintime = "-999"; //请求时间
String reqendtime = "-999";; //响应时间
String reqspendtime = "-999"; //时间差
String reqip = "-999"; //客户端ip
String requrlstr = "-999";//请求地址
String refer = "-999";
String method = "-999";;
String httpcode = "-999";
String userid = "-999";
String sessionid = "-999";
String platformid = "-999";
String appversion = "-999";
String channelid = "-999";
String bookrecommend= "-999";
String payType = "-999";
String paynum = "-999";
String rechargetype = "-999";
String payamount = "-999";
String ssid = "-999";
String isnewuser = "-999";
String ischannelnewuser = "-999";
String readwordstype = "-999";
String readwords = "-999";
String mobile = "-999";
String uaplatform = "-999";
String uastat = "-999";
String apicode = "-999";
String xClient = "-999";
String ua = "-999";
String reqParam = "-999";
String processParam = "-999";
try {
logversion = list[0].split("\\[")[1];
projectname = list[1];
serverip = list[2]; //服务器ip
reqbegintime = list[3]; //请求时间
reqendtime = list[4]; //响应时间
reqspendtime = list[5]; //时间差
reqip = list[6]; //客户端ip
requrlstr = list[7];//请求地址
refer = list[8];
method = list[9];
httpcode = list[10];
userid = list[11];
sessionid = list[12];
platformid = list[13];
appversion = list[14];
channelid = list[15];
bookrecommend= list[16];
payType = list[17];
paynum = list[18];
rechargetype = list[19];
payamount = list[20];
ssid = list[21];
isnewuser = list[22];
ischannelnewuser = list[23];
readwordstype = list[24];
readwords = list[25];
mobile = list[26];
uaplatform = list[27];
uastat = list[28];
apicode = list[29];
xClient = list[30];
ua = list[31];
reqParam = list[32];
processParam = list[33];
}catch (Exception e){
}
//过滤下载日志
if((requrlstr.contains("/api/download/")) || (requrlstr.contains("/api/other/jrttBack")) || (requrlstr.contains("/kuaishou/notify")) || (requrlstr.contains("/api/getPublicKey")) || ua.contains("JianKongBao") )
return;
else if(requrlstr.contains("type=bbs")){
if (platformid == "1")
platformid="100";
else if (platformid == "2")
platformid="200";
}
//解析国家和地区
Map<String, String> ips = cleanUtil.getIp(reqip);
String country = ips.get("country");
String detail_area = ips.get("detail_area");
String province=ips.get("province");
String city=ips.get("city");
//解析action和action_detail
Map<String, String> actions = cleanUtil.getAction(requrlstr);
String action=actions.get("action");
String action_detail=actions.get("action_detail");
//解析bookid和chapternum
Map<String, String> book=cleanUtil.getBook(reqParam);
String bookid=book.get("bookId");
String chapternum=book.get("chapterNum");
//暂时用不到的三个属性
String referurl="-999";
String refer_com="-999";
String referaction="-999";
//切分Xclient取出相应的数据
String screensize="-999";
String type="-999";
String imsi="-999";
String version ="-999";
try {
screensize = xClient.split("ss=")[1].split(";")[0];
type = xClient.split("pm=")[1].split(";")[0];
imsi = xClient.split("imsi=")[1].split(";")[0];
version = xClient.split("version=")[1].split(";")[0];
}catch (Exception e){
}
String appversion2="-999";
if (version !=null && version.indexOf(".")>0 ){
int first_index = version.indexOf('.') + 1;
int last_index = version.lastIndexOf('.');
appversion2 = version.substring(first_index,last_index);
}
String mac ="-999";
String device_id ="-999";
String imei ="-999";
String idfa ="-999";
try {
mac = xClient.split("mac=")[1].split(";")[0];
device_id = xClient.split("dID=")[1].split(";")[0];
imei = xClient.split("imei=")[1].split(";")[0];
idfa = xClient.split("idfa=")[1].split(";")[0];
}catch (Exception e){
}
//暂时用不到的几个属性
String endbrand = "-999";
String browserbrand = "-999";
String browserversion = "-999";
String primaryurlpath = "-999";
String others = "-999";
String cellid = "-999";
String projectType="-999";
//解析操作系统版本
String os_version="-999";
String platform="api";
try {
os_version = ua.split(";")[1].split(" ")[2];
platform=ua.split(";")[1].split(" ")[1];
}catch (Exception e){
}
//日志时间
String day=reqbegintime.split(" ")[0];
line.set(logversion+"\t"+projectname+"\t"+serverip+"\t"+reqbegintime+"\t"+reqendtime+"\t"+reqspendtime+"\t"+reqip+"\t"+country
+"\t"+detail_area+"\t"+province+"\t"+city+"\t"+requrlstr+"\t"+action+"\t"+action_detail+"\t"+bookid+"\t"+chapternum+"\t"+referurl+"\t"+refer_com+"\t"+
referaction+"\t"+method+"\t"+httpcode+"\t"+userid+"\t"+sessionid+"\t"+platformid+"\t"+appversion+"\t"+channelid+"\t"+
bookrecommend+"\t"+payType+"\t"+paynum+"\t"+rechargetype+"\t"+payamount+"\t"+ssid+"\t"+isnewuser+"\t"+ischannelnewuser+"\t"+
readwordstype+"\t"+readwords+"\t"+mobile+"\t"+uaplatform+"\t"+uastat+"\t"+apicode+"\t"+imei+"\t"+os_version+"\t"+screensize
+"\t"+type+"\t"+imsi+"\t"+cellid+"\t"+version+"\t"+device_id+"\t"+mac+"\t"+endbrand+"\t"+browserbrand+"\t"+browserversion+"\t"+
ua+"\t"+primaryurlpath+"\t"+reqParam+"\t"+others+"\t"+processParam+"\t"+projectType+"\t"+idfa+"\t"+appversion2+"\t"+platform+"\t"+day);
context.write(line, NullWritable.get());
}
}
package com.novel.clean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CleanDriver {
public static void main(String[] args) throws Exception{
args = new String[]{"e:/input", "e:/output"};
// 1 获取job信息
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
//2 设置加载jar包路径
job.setJarByClass(CleanDriver.class);
//3 关联map
job.setMapperClass(CleanApi.class);
//4 设置最终输出数据类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//5 设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//6 设置reduceTask数量为0
job.setNumReduceTasks(0);
//7 提交
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
package com.novel.clean.util;
import com.alibaba.fastjson.JSONObject;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class CleanUtil {
public static QQWryIPUtil qqWryIPUtil;
public static IPZone ipZone;
static{
try {
qqWryIPUtil = new QQWryIPUtil();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* This method is used for incoming IP to resolve provinces and service providers
* 此方法用于传入ip解析出省份和服务商
* @author maliang
* */
public Map<String,String> getIp(String ip){
Map<String,String> analysis=new HashMap<>();
try {
ipZone= qqWryIPUtil.findIP(ip);
Map<String, String> zone = QQWryIPUtil.addressResolution(ipZone.getMainInfo());
analysis.put("country",ipZone.getMainInfo());
analysis.put("detail_area",ipZone.getSubInfo());
analysis.put("province",zone.get("province") ==""?"-999":zone.get("province"));
analysis.put("city",zone.get("city") ==""?"-999":zone.get("city"));
}catch (Exception e){
analysis.put("country","-999");
analysis.put("detail_area","-999");
analysis.put("province","-999");
analysis.put("city","-999");
}
return analysis;
}
/**
* This method resolves action and action_detail fields by requesting addresses
*这个方法可以通过请求地址把action和action_detail字段解析出来
* @author maliang
* */
public Map<String,String> getAction(String requrlstr){
Map<String,String> analysis=new HashMap<>();
analysis.put("action","-999");
analysis.put("action_detail","-999");
String[] split = requrlstr.split("/");
try {
analysis.put("action",split[2]+"_"+split[3]);
analysis.put("action_detail",split[2]+"_"+split[3]);
}catch (Exception e){
}
return analysis;
}
/**
* This method can parse the bookid and chapternum fields clicked by users in reqParam
*这个方法可以把reqParam里用户点击的bookid和chapternum字段解析出来
* @author maliang
* */
public Map<String,String> getBook(String reqParam){
Map<String,String> analysis=new HashMap<>();
try{
JSONObject jsonObject = JSONObject.parseObject(reqParam);
analysis.put("bookId",(jsonObject.getString("bookId")==null)?"-999":jsonObject.getString("bookId"));
analysis.put("chapterNum",jsonObject.getString("chapterNum")==null?"-999":jsonObject.getString("chapterNum"));
}catch (Exception e){
}
return analysis;
}
}
package com.novel.clean.util;
/**
* @author: xsj
* @date:2019/8/7
*/
public class IPZone {
private final String ip;
private String mainInfo = "";
private String subInfo = "";
public IPZone(String ip) {
this.ip = ip;
}
public String getIp() {
return this.ip;
}
public String getMainInfo() {
return this.mainInfo;
}
public String getSubInfo() {
return this.subInfo;
}
public void setMainInfo(String info) {
this.mainInfo = info;
}
public void setSubInfo(String info) {
this.subInfo = info;
}
public String toString() {
return this.mainInfo + this.subInfo;
}
}
package com.novel.clean.util;
import com.novel.clean.util.IPZone;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author: xsj
* @date:2019/8/7
*/
public class QQWryIPUtil {
public static final Pattern IPZonePattern = Pattern.compile("(?<province>[^省]+自治区|.*?省|.*?行政区|.*?市)(?<city>[^市]+自治州|.*?地区|.*?行政单位|.+盟|市辖区|.*?市|.*?县)(?<county>[^县]+县|.+区|.+市|.+旗|.+海域|.+岛)?(?<town>[^区]+区|.+镇)?(?<village>.*)");
private static class QIndex {
public final long minIP;
public final long maxIP;
public final int recordOffset;
public QIndex(final long minIP, final long maxIP, final int recordOffset) {
this.minIP = minIP;
this.maxIP = maxIP;
this.recordOffset = recordOffset;
}
}
private static class QString {
public final String string;
/** length including the \0 end byte */
public final int length;
public QString(final String string, final int length) {
this.string = string;
this.length = length;
}
}
private static final int INDEX_RECORD_LENGTH = 7;
private static final byte REDIRECT_MODE_1 = 0x01;
private static final byte REDIRECT_MODE_2 = 0x02;
private static final byte STRING_END = '\0';
private final byte[] data;
private final long indexHead;
private final long indexTail;
/**
* Create QQWry by loading qqwry.dat from classpath.
*
* @throws IOException
* if encounter error while reading qqwry.dat
*/
public QQWryIPUtil() throws IOException {
ClassLoader classLoader = QQWryIPUtil.class.getClassLoader();
final InputStream in = classLoader.getResourceAsStream("qqwry.dat");
final ByteArrayOutputStream out = new ByteArrayOutputStream(10 * 1024 * 1024); // 10MB
final byte[] buffer = new byte[4096];
while (true) {
final int r = in.read(buffer);
if (r == -1) {
break;
}
out.write(buffer, 0, r);
}
data = out.toByteArray();
indexHead = readLong32(0);
indexTail = readLong32(4);
}
/**
* Create QQWry with provided qqwry.dat data.
*
* @param data
* fully read data from a qqwry.dat file.
*/
public QQWryIPUtil(final byte[] data) {
this.data = data;
indexHead = readLong32(0);
indexTail = readLong32(4);
}
/**
* Create QQWry from a path to qqwry.dat file.
*
* @param file
* path to qqwry.dat
* @throws IOException
* if encounter error while reading from the given file.
*/
public QQWryIPUtil(final Path file) throws IOException {
this(Files.readAllBytes(file));
}
public IPZone findIP(final String ip) {
final long ipNum = toNumericIP(ip);
final QIndex idx = searchIndex(ipNum);
if (idx == null) {
return new IPZone(ip);
}
return readIP(ip, idx);
}
private long getMiddleOffset(final long begin, final long end) {
long records = (end - begin) / INDEX_RECORD_LENGTH;
records >>= 1;
if (records == 0) {
records = 1;
}
return begin + (records * INDEX_RECORD_LENGTH);
}
private QIndex readIndex(final int offset) {
final long min = readLong32(offset);
final int record = readInt24(offset + 4);
final long max = readLong32(record);
return new QIndex(min, max, record);
}
private int readInt24(final int offset) {
int v = data[offset] & 0xFF;
v |= ((data[offset + 1] << 8) & 0xFF00);
v |= ((data[offset + 2] << 16) & 0xFF0000);
return v;
}
private IPZone readIP(final String ip, final QIndex idx) {
final int pos = idx.recordOffset + 4; // skip ip
final byte mode = data[pos];
final IPZone z = new IPZone(ip);
if (mode == REDIRECT_MODE_1) {
final int offset = readInt24(pos + 1);
if (data[offset] == REDIRECT_MODE_2) {
readMode2(z, offset);
} else {
final QString mainInfo = readString(offset);
final String subInfo = readSubInfo(offset + mainInfo.length);
z.setMainInfo(mainInfo.string);
z.setSubInfo(subInfo);
}
} else if (mode == REDIRECT_MODE_2) {
readMode2(z, pos);
} else {
final QString mainInfo = readString(pos);
final String subInfo = readSubInfo(pos + mainInfo.length);
z.setMainInfo(mainInfo.string);
z.setSubInfo(subInfo);
}
return z;
}
private long readLong32(final int offset) {
long v = data[offset] & 0xFFL;
v |= (data[offset + 1] << 8L) & 0xFF00L;
v |= ((data[offset + 2] << 16L) & 0xFF0000L);
v |= ((data[offset + 3] << 24L) & 0xFF000000L);
return v;
}
private void readMode2(final IPZone z, final int offset) {
final int mainInfoOffset = readInt24(offset + 1);
final String main = readString(mainInfoOffset).string;
final String sub = readSubInfo(offset + 4);
z.setMainInfo(main);
z.setSubInfo(sub);
}
private QString readString(final int offset) {
int i = 0;
final byte[] buf = new byte[128];
for (;; i++) {
final byte b = data[offset + i];
if (STRING_END == b) {
break;
}
buf[i] = b;
}
try {
return new QString(new String(buf, 0, i, "GB18030"), i + 1);
} catch (final UnsupportedEncodingException e) {
return new QString("", 0);
}
}
private String readSubInfo(final int offset) {
final byte b = data[offset];
if ((b == REDIRECT_MODE_1) || (b == REDIRECT_MODE_2)) {
final int areaOffset = readInt24(offset + 1);
if (areaOffset == 0) {
return "";
} else {
return readString(areaOffset).string;
}
} else {
return readString(offset).string;
}
}
private QIndex searchIndex(final long ip) {
long head = indexHead;
long tail = indexTail;
while (tail > head) {
final long cur = getMiddleOffset(head, tail);
final QIndex idx = readIndex((int) cur);
if ((ip >= idx.minIP) && (ip <= idx.maxIP)) {
return idx;
}
if ((cur == head) || (cur == tail)) {
return idx;
}
if (ip < idx.minIP) {
tail = cur;
} else if (ip > idx.maxIP) {
head = cur;
} else {
return idx;
}
}
return null;
}
private long toNumericIP(final String s) {
final String[] parts = s.split("\\.");
if (parts.length != 4) {
throw new IllegalArgumentException("ip=" + s);
}
long n = Long.parseLong(parts[0]) << 24L;
n += Long.parseLong(parts[1]) << 16L;
n += Long.parseLong(parts[2]) << 8L;
n += Long.parseLong(parts[3]);
return n;
}
/**
* 解析地址
* @author lin
* @param address
* @return
*/
public static Map<String,String> addressResolution(String address){
Matcher m= IPZonePattern.matcher(address);
String province,city,county,town,village;
Map<String,String> row=null;
if (m.find()){
row= new LinkedHashMap<>();
province=m.group("province");
row.put("province", province==null?"":province.trim());
city=m.group("city");
row.put("city", city==null?"":city.trim());
county=m.group("county");
row.put("county", county==null?"":county.trim());
town=m.group("town");
row.put("town", town==null?"":town.trim());
village=m.group("village");
row.put("village", village==null?"":village.trim());
}
return row;
}
public static void main(String[] args) {
try {
QQWryIPUtil qqWryIPUtil = new QQWryIPUtil();
// QQWryIPUtil qqWryIPUtil = new QQWryIPUtil(Paths.get("/Users/wangkun/Applications/qqwry.dat"));
IPZone ip = qqWryIPUtil.findIP("117.136.45.68");
// IPZone ip = qqWryIPUtil.findIP("192.168.31.13");
System.out.println(ip.getMainInfo());
System.out.println(ip.getSubInfo());
}catch (Exception e){
System.out.println(e);
}
}
}
This diff is collapsed.
This diff is collapsed.
#!/bin/sh
#time 2017-06-07
################# 初始化变量#########################
hadoop="/usr/bin/hadoop";
hdfs="/usr/bin/hdfs";
cleanpath="/home/hdfs/hadoop-hdfs/personas/clean";
if [ "$1" != "" ];then
date=`date -d "$1" +%Y-%m-%d`;
logdate=`date -d "$1" +%Y%m%d`;
else
date=`date -d "-1 day" +%Y-%m-%d`;
logdate=`date -d "-1 day" +%Y%m%d`;
fi
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_android.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_android_lbb.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_android_kyy.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_ios.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_h5.sh $date
#!/bin/sh
#time 2017-06-07
################# 初始化变量#########################
source /etc/bi.profile
hadoop="/usr/bin/hadoop";
hdfs="/usr/bin/hdfs";
cleanpath="/home/hdfs/hadoop-hdfs/personas/clean";
if [ "$1" != "" ];then
date=`date -d "$1" +%Y-%m-%d`;
logdate=`date -d "$1" +%Y%m%d`;
else
date=`date -d "-1 day" +%Y-%m-%d`;
logdate=`date -d "-1 day" +%Y%m%d`;
fi
year=${date:0:4}
month=${date:4:2}
export MYSQL_BI_IP=10.90.8.32
export MYSQL_BI_USER=readtongji
export MYSQL_BI_PWD=uZpvSuDuEiEOwCtk21on
export MYSQL_BI_DB=db_bi
export MYSQL_BI_PORT=3306
function Delete(){
hadoop fs -rm -r -f /newread/read/log_access_txt/platform=android/day=${date}/
}
function Clean(){
cd ${cleanpath}
mark=`hadoop fs -ls /newread/logflag/android/${logdate}/*.lock | wc -l`
title="android日志检查"
content="${date}android日志上传成功"
content1="${date}android日志上传失败"
flag=0
if [ $mark -eq 1 ];then
/bin/python ${alert_path}/alert_hive.py $title $content "android"
flag=1
else
/bin/python ${alert_path}/alert_hive.py $title $content1 "android"
fi
while [ $mark -eq 0 ];do
sleep 10
mark=`hadoop fs -ls /newread/logflag/android/${logdate}/*.lock | wc -l`
done
echo ${mark}
if [ $mark -eq 1 -a $flag -eq 0 ];then
/bin/python ${alert_path}/alert_hive.py $title $content "android"
elif [ $mark -eq 0 -a $flag -eq 0 ];then
/bin/python ${alert_path}/alert_hive.py $title $content1 "android"
fi
hadoop jar /usr/hdp/2.6.5.0-292/hadoop-mapreduce/hadoop-streaming.jar -input /newread/logbak/android/${logdate}/*-access-*.log -output /newread/read/log_access_txt/platform=android/day=${date} -file /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py -mapper "/home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py" -jobconf mapred.job.name="job_acclog_last_and"
#hadoop jar /usr/hdp/2.6.5.0-292/hadoop-mapreduce/hadoop-streaming.jar -inputformat com.hadoop.mapred.DeprecatedLzoTextInputFormat -input /newread/logbak/android/${logdate}/*-access-*.log.lzo -output /newread/read/log_access_txt/platform=android/day=${date} -file /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py -mapper "/home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py" -jobconf mapred.job.name="job_acclog_last_and" -jobconf mapred.output.compress=true -jobconf mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec
#check_result "clean_acclog_android"
}
function LoadHive(){
hive <<EOF
use read;
ALTER TABLE log_access_txt DROP IF EXISTS partition (platform='android',day='$date');
alter table log_access_txt add IF NOT EXISTS partition (platform='android',day='$date') location '/newread/read/log_access_txt/platform=android/day=$date';
exit;
EOF
exit;
}
function main(){
#hadoop fs -test -e /newread/logbak/novel/$logdate/access/
#while [ $? -ne 0 ];do
# sleep 3
# hadoop fs -test -e /newread/logbak/novel/$logdate/access/
#done
Delete
Clean
LoadHive
# 插入压缩表read.log_access
#sh /home/hdfs/hadoop-hdfs/personas/clean/read_log_access.sh ${date}
}
main
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
package com.novel.test;
import com.novel.clean.util.CleanUtil;
import java.util.Map;
public class Test {
public static void main(String[] args) {
String reqParam="{\"sign\":\"985d1ecef2eabd7917c6b4369e15905c\",\"fileType\":\"1\",\"time\":\"1570766944214\",\"is_ad_version\":\"1\",\"source\":\"dialog2\",\"readChapterNum\":\"1\",\"bookId\":\"4198300\",\"chapterNum\":\"2\",\"userPath\":\"dialog,dialog2,C2\"}";
CleanUtil cleanUtil =new CleanUtil();
Map<String, String> book=cleanUtil.getBook(reqParam);
//System.out.println(book.get("bookId"));
//System.out.println(book.get("chapterNum"));
String ua="Mozilla/5.0 (Linux; Android 7.1.1; OD103 Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36\t-999";
String s1 = ua.split(";")[1].split(" ")[2];
System.out.print(s1);
}
}
This diff is collapsed.
This diff is collapsed.
#!/bin/sh
#time 2017-06-07
################# 初始化变量#########################
hadoop="/usr/bin/hadoop";
hdfs="/usr/bin/hdfs";
cleanpath="/home/hdfs/hadoop-hdfs/personas/clean";
if [ "$1" != "" ];then
date=`date -d "$1" +%Y-%m-%d`;
logdate=`date -d "$1" +%Y%m%d`;
else
date=`date -d "-1 day" +%Y-%m-%d`;
logdate=`date -d "-1 day" +%Y%m%d`;
fi
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_android.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_android_lbb.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_android_kyy.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_ios.sh $date
/bin/bash /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api_h5.sh $date
#!/bin/sh
#time 2017-06-07
################# 初始化变量#########################
source /etc/bi.profile
hadoop="/usr/bin/hadoop";
hdfs="/usr/bin/hdfs";
cleanpath="/home/hdfs/hadoop-hdfs/personas/clean";
if [ "$1" != "" ];then
date=`date -d "$1" +%Y-%m-%d`;
logdate=`date -d "$1" +%Y%m%d`;
else
date=`date -d "-1 day" +%Y-%m-%d`;
logdate=`date -d "-1 day" +%Y%m%d`;
fi
year=${date:0:4}
month=${date:4:2}
export MYSQL_BI_IP=10.90.8.32
export MYSQL_BI_USER=readtongji
export MYSQL_BI_PWD=uZpvSuDuEiEOwCtk21on
export MYSQL_BI_DB=db_bi
export MYSQL_BI_PORT=3306
function Delete(){
hadoop fs -rm -r -f /newread/read/log_access_txt/platform=android/day=${date}/
}
function Clean(){
cd ${cleanpath}
mark=`hadoop fs -ls /newread/logflag/android/${logdate}/*.lock | wc -l`
title="android日志检查"
content="${date}android日志上传成功"
content1="${date}android日志上传失败"
flag=0
if [ $mark -eq 1 ];then
/bin/python ${alert_path}/alert_hive.py $title $content "android"
flag=1
else
/bin/python ${alert_path}/alert_hive.py $title $content1 "android"
fi
while [ $mark -eq 0 ];do
sleep 10
mark=`hadoop fs -ls /newread/logflag/android/${logdate}/*.lock | wc -l`
done
echo ${mark}
if [ $mark -eq 1 -a $flag -eq 0 ];then
/bin/python ${alert_path}/alert_hive.py $title $content "android"
elif [ $mark -eq 0 -a $flag -eq 0 ];then
/bin/python ${alert_path}/alert_hive.py $title $content1 "android"
fi
hadoop jar /usr/hdp/2.6.5.0-292/hadoop-mapreduce/hadoop-streaming.jar -input /newread/logbak/android/${logdate}/*-access-*.log -output /newread/read/log_access_txt/platform=android/day=${date} -file /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py -mapper "/home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py" -jobconf mapred.job.name="job_acclog_last_and"
#hadoop jar /usr/hdp/2.6.5.0-292/hadoop-mapreduce/hadoop-streaming.jar -inputformat com.hadoop.mapred.DeprecatedLzoTextInputFormat -input /newread/logbak/android/${logdate}/*-access-*.log.lzo -output /newread/read/log_access_txt/platform=android/day=${date} -file /home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py -mapper "/home/hdfs/hadoop-hdfs/personas/clean/clean_api/clean_api.py" -jobconf mapred.job.name="job_acclog_last_and" -jobconf mapred.output.compress=true -jobconf mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec
#check_result "clean_acclog_android"
}
function LoadHive(){
hive <<EOF
use read;
ALTER TABLE log_access_txt DROP IF EXISTS partition (platform='android',day='$date');
alter table log_access_txt add IF NOT EXISTS partition (platform='android',day='$date') location '/newread/read/log_access_txt/platform=android/day=$date';
exit;
EOF
exit;
}
function main(){
#hadoop fs -test -e /newread/logbak/novel/$logdate/access/
#while [ $? -ne 0 ];do
# sleep 3
# hadoop fs -test -e /newread/logbak/novel/$logdate/access/
#done
Delete
Clean
LoadHive
# 插入压缩表read.log_access
#sh /home/hdfs/hadoop-hdfs/personas/clean/read_log_access.sh ${date}
}
main
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
#Generated by Maven
#Sun Oct 13 22:37:11 CST 2019
version=1.0
groupId=com.novel.clean
artifactId=clean
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment