爬取 携程
主页面urlhttp://flights.ctrip.com/booking/BJS-KMG-day-1.html?DDate1=2017-09-22
获取数据urlhttp://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=c802fde883e64a3c8fba0e99e45d023c&rk=2.7090996922689414192821&CK=1D7795E4EB96B7578AA673D6EF346C33&r=0.2337708871934405872019
首次搜索
http://flights.ctrip.com/domestic/Search/FirstRoute/?ddate1=2017-09-22&ddate2=2017-09-22&dcity1=BJS&acity1=KMG
http://flights.ctrip.com/domestic/booking/BJS-KMG---D-adu-1/?dayoffset=1&ddate1=2017-09-22&ddate2=2017-09-22
重新搜索
http://flights.ctrip.com/booking/bjs-kmg---d-adu-1/?ddate1=2017-09-22&ddate2=2017-09-22 往返
http://flights.ctrip.com/booking/BJS-KMG-day-1.html?DDate1=2017-09-22 单程
往返
var url = "//flights.ctrip.com/domesticsearch/search/SearchRoundRecommend?DCity1=BJS&ACity1=KMG&SearchType=D&DDate1=2017-09-22&ACity2=BJS&DDate2=2017-09-22&IsNearAirportRecommond=0&LogToken=3c10647ccad04412a33567f25335875a&CK=97017DBB47A37AFE6BE4D99105F78AFE";
单程
var url = "//flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=66c9c383c12f4dc685a9ad3e5834a7c4&CK=D6BF57FFFC0A4E89AF51CF7186A1944E";
根据直接获取json的url拿到数据解析后就能获得航班信息,这个url里有4个参数,需要从搜索主页的页面里获取
单程和往返的原理一样,就是往返多了一个返回日期,其他都一样
LogToken不变 r不变 CK有变化 rk用Math.random
LogToken
rk
CK
r 在主页面的第34行
LogToken和CK根据主页面24行的url可以得到,r值在主页面34行可以得到
获取的json数据的url,爬取数据用的url
单程
http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=c96a69a7ca9142be9f0e3192f44f9f07&rk=1.445840834132689163412&CK=6A53EEBD27EBBE6785AF7951EC03EB1E&r=0.3795051054613884121115
往返
http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=D&DDate1=2017-09-22&ACity2=BJS&DDate2=2017-09-22&IsNearAirportRecommond=0&LogToken=584465ec999045db9a546e1ce6a04a3f&rk=4.057381591787214165342&CK=D76BF5FFFC0A4E89AF51CF7186A1944E&r=0.543402675163338674414
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Cache-Control" content="no-transform " />
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<meta name="description" content="携程旅行网为您提供北京到昆明特价机票以及北京到昆明航班查询。携程旅行网于2003年在美国纳斯达克上市,拥有覆盖全国的服务网络。提供国内国际各大航空公司的航线航班,安全支付值得信赖,是您网上订购北京到昆明机票的首选。免费咨询800-820-6666。" />
<meta name="keywords" content="北京到昆明特价,北京到昆明机票预订,北京到昆明航班查询,携程机票" />
<title>北京到昆明机票预订 - 北京到昆明特价机票 - 北京到昆明航班查询预订 - 携程国内机票预订</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
<link rel="dns-prefetch" href="//webresource.c-ctrip.com" />
<link rel="dns-prefetch" href="//pic.c-ctrip.com" />
<link rel="dns-prefetch" href="//images3.c-ctrip.com" />
<link rel="dns-prefetch" href="//crm.ws.ctrip.com" />
<link rel="dns-prefetch" href="//s.c-ctrip.com" />
<link rel="dns-prefetch" href="//www.google-analytics.com" />
<link rel="canonical" href="//flights.ctrip.com/domestic/booking/BJS-KMG-day-1.html"/>
<link href="//webresource.c-ctrip.com/ResFlightOnline/R2/Booking/css/fltdomestic111027/searchresult_v2.1.css?ReleaseNo=CR_2017_09_20_21_00_00" type="text/css" rel="stylesheet" />
</head>
<body class="gray_body">
<script type="text/javascript">
var url = "//flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=c802fde883e64a3c8fba0e99e45d023c&CK=D7795E14EB96B7578AA673D6EF346C33";
var _searchCount_c = 0;
function ajaxRequest(n,t){var i=null,e,f,l,o,s,r,c,u,h;if(typeof XMLHttpRequest!="undefined")i=new XMLHttpRequest;else if(typeof ActiveXObject!="undefined"){if(typeof arguments.callee.aciveXString!="string")for(e=["MSXML2.XMLHttp.6.0","MSXML2.XMLHttp.3.0","MSXML2.XMLHttp"],f=0,l=e.length;f<l;f++)try{i=new ActiveXObject(e[f]);arguments.callee.activeXString=e[f];break}catch(a){}i==null&&(i=new ActiveXObject(arguments.callee.activeXString))}i.onreadystatechange=function(){try{if(i.readyState==4)if(i.status>=200&&i.status<300||i.status==304){var r=eval("("+i.responseText+")");if(_searchCount_c==0&&r&&r.Error&&(r.Error.Code==104||r.Error.Code==1004)&&(r.Error.Message==""||!r.Error.Message)){_searchCount_c++;setTimeout(function(){var i=n.split("&");i.pop();ajaxRequest(i.join("&")+"&rt="+Math.random()*1e3,t)},1e3);return}jsonCallback.done(r)}else i.status!=0&&jsonCallback.onError()}catch(u){jsonCallback.onError()}};window.location.hash&&(o=window.location.hash.match(/DDate1=\d{4}-\d{2}-\d{2}/),o&&o.length>0&&(n=n.replace(/DDate1=(\d{4}-\d{2}-\d{2})/ig,o[0])),s=window.location.hash.match(/DDate2=\d{4}-\d{2}-\d{2}/),s&&s.length>0&&(n=n.replace(/DDate2=(\d{4}-\d{2}-\d{2})/ig,s[0])));r=n.replace(/^[\s\xA0]+|[\s\xA0]+$/g,"");(r.indexOf("ClassType=CF")==-1||r.indexOf("ClassType=&")!=-1)&&(r+=getStorage("FD_SearchPage_onlyCf")=="CF"?"&ClassType=CF":"");_searchCount_c>0&&(c=t.split(".")[1],t="0."+c.substring(1,c.length-1));u=r.split("&");h=r.indexOf("rk=")>=0||r.indexOf("rt=")>=0?u.splice(u.length-2,1)[0]:u.pop();u.push("CK=");h=h.split("=")[1];var fn=(function(u,r,k,t){var Z21=1,M21Z=1;Z21=Z21+=parseInt(Math.cos(7) * 0xa);Z21=Z21-=parseInt(Math.tan(7) * 0xa);if(Z21<0)Z21=-Z21; while(Z21>30)Z21=Z21%10;M21Z=M21Z+=parseInt(Math.sin(6) * 0xa);M21Z=M21Z+=parseInt(Math.cos(6) * 0xa);M21Z=M21Z*=parseInt(Math.log(6) * 0xa);if(M21Z<0)M21Z=-M21Z; while(M21Z>30)M21Z=M21Z%10;(function(r,u,x,y,t,k){if(!window.location.href){return;}var l=r.split(''); var c=l.splice(y,1);l.splice(x,0,c);t.open('GET', u.join('&')+l.join('') + '&r=' + k, !0);t.send(null);})(r,u,Z21,M21Z,t,k)});fn(u,h,t,i)}var jsonCallback={isError:!1,isReady:!1,data:{},readyList:[],errorList:[],ready:function(n){this.isReady==!1?this.readyList.push(n):n(this.data)},done:function(n){this.isReady=!0;this.data=n;for(var t=0;this.readyList[t];)this.readyList[t](n),t++},error:function(n){this.isError==!1?this.errorList.push(n):n()},onError:function(){this.isError=!0;for(var n=0;this.errorList[n];)this.errorList[n](),n++}},getStorage=function(n){var i,r,t;try{if(i="{}",window.localStorage)i=localStorage.getItem("jStorage");else if(window.globalStorage)i=window.globalStorage[window.location.hostname];else{r=document.head||document.getElementsByTagName("head")[0];t=document.createElement("link");t.style.behavior="url(#default#userData)";r.appendChild(t);try{t.load("jStorage")}catch(u){t.setAttribute("jStorage","{}");t.save("jStorage");t.load("jStorage")}i=t.getAttribute("jStorage")||"{}";r.removeChild(t)}return!i||i=="{}"?"":eval("("+i+")")[n]}catch(f){return""}}
var searchRouteIndex = "0";
var isCivil = false;
var roundTripCombinationSwitch = false;
ajaxRequest(url + '&rk=' + Math.random()*10+'192821','0.2337708871934405872019');
</script>
<div>
中间省略
</div>
</body>
</html>
function ajaxRequest(n, t) {
var i = null, e, f, l, o, s, r, c, u, h;
if (typeof XMLHttpRequest != "undefined") i = new XMLHttpRequest; else if (typeof ActiveXObject != "undefined") {
if (typeof arguments.callee.aciveXString != "string") for (e = ["MSXML2.XMLHttp.6.0", "MSXML2.XMLHttp.3.0", "MSXML2.XMLHttp"], f = 0, l = e.length; f < l; f++) try {
i = new ActiveXObject(e[f]);
arguments.callee.activeXString = e[f];
break
} catch (a) {
}
i == null && (i = new ActiveXObject(arguments.callee.activeXString))
}
i.onreadystatechange = function () {
try {
if (i.readyState == 4) if (i.status >= 200 && i.status < 300 || i.status == 304) {
var r = eval("(" + i.responseText + ")");
if (_searchCount_c == 0 && r && r.Error && (r.Error.Code == 104 || r.Error.Code == 1004) && (r.Error.Message == "" || !r.Error.Message)) {
_searchCount_c++;
setTimeout(function () {
var i = n.split("&");
i.pop();
ajaxRequest(i.join("&") + "&rt=" + Math.random() * 1e3, t)
}, 1e3);
return
}
jsonCallback.done(r)
} else i.status != 0 && jsonCallback.onError()
} catch (u) {
jsonCallback.onError()
}
};
window.location.hash && (o = window.location.hash.match(/DDate1=\d{4}-\d{2}-\d{2}/), o && o.length > 0 && (n = n.replace(/DDate1=(\d{4}-\d{2}-\d{2})/ig, o[0])), s = window.location.hash.match(/DDate2=\d{4}-\d{2}-\d{2}/), s && s.length > 0 && (n = n.replace(/DDate2=(\d{4}-\d{2}-\d{2})/ig, s[0])));
r = n.replace(/^[\s\xA0]+|[\s\xA0]+$/g, "");
(r.indexOf("ClassType=CF") == -1 || r.indexOf("ClassType=&") != -1) && (r += getStorage("FD_SearchPage_onlyCf") == "CF" ? "&ClassType=CF" : "");
_searchCount_c > 0 && (c = t.split(".")[1], t = "0." + c.substring(1, c.length - 1));
u = r.split("&");
h = r.indexOf("rk=") >= 0 || r.indexOf("rt=") >= 0 ? u.splice(u.length - 2, 1)[0] : u.pop();
u.push("CK=");
h = h.split("=")[1];
var fn = (function (u, r, k, t) {
var Z21 = 1, M21Z = 1;
Z21 = Z21 += parseInt(Math.cos(7) * 0xa);
Z21 = Z21 -= parseInt(Math.tan(7) * 0xa);
if (Z21 < 0) Z21 = -Z21;
while (Z21 > 30) Z21 = Z21 % 10;
M21Z = M21Z += parseInt(Math.sin(6) * 0xa);
M21Z = M21Z += parseInt(Math.cos(6) * 0xa);
M21Z = M21Z *= parseInt(Math.log(6) * 0xa);
if (M21Z < 0) M21Z = -M21Z;
while (M21Z > 30) M21Z = M21Z % 10;
(function (r, u, x, y, t, k) {
if (!window.location.href) {
return;
}
var l = r.split('');
var c = l.splice(y, 1);
l.splice(x, 0, c);
t.open('GET', u.join('&') + l.join('') + '&r=' + k, !0);
t.send(null);
})(r, u, Z21, M21Z, t, k)
});
fn(u, h, t, i)
}
var jsonCallback = {
isError: !1,
isReady: !1,
data: {},
readyList: [],
errorList: [],
ready: function (n) {
this.isReady == !1 ? this.readyList.push(n) : n(this.data)
},
done: function (n) {
this.isReady = !0;
this.data = n;
for (var t = 0; this.readyList[t];) this.readyList[t](n), t++
},
error: function (n) {
this.isError == !1 ? this.errorList.push(n) : n()
},
onError: function () {
this.isError = !0;
for (var n = 0; this.errorList[n];) this.errorList[n](), n++
}
}, getStorage = function (n) {
var i, r, t;
try {
if (i = "{}", window.localStorage) i = localStorage.getItem("jStorage"); else if (window.globalStorage) i = window.globalStorage[window.location.hostname]; else {
r = document.head || document.getElementsByTagName("head")[0];
t = document.createElement("link");
t.style.behavior = "url(#default#userData)";
r.appendChild(t);
try {
t.load("jStorage")
} catch (u) {
t.setAttribute("jStorage", "{}");
t.save("jStorage");
t.load("jStorage")
}
i = t.getAttribute("jStorage") || "{}";
r.removeChild(t)
}
return !i || i == "{}" ? "" : eval("(" + i + ")")[n]
} catch (f) {
return ""
}
}
数据解析
├─ als 航空公司 {MU: "东方航空", ZH: "深圳航空", CA: "中国国航"}
├─ apb 机场 {TYN12: "武宿国际机场T2", PEK2: "首都国际机场T2", YCU1169: "关公机场", PEK3: "首都国际机场T3", NKG987: "禄口国际机场T2"}
├─ fis
├─ acc 到达地 YCU
├─ acn 目的地 运城
├─ alc CA 航空公司
├─ apbn 机场名 关公机场
├─ apc YCU
├─ asmsn 航站楼
├─ at 到达时间 2017-10-20 13:50:00
├─
├─ confort 里面有历史准点率
├─ ArrivedBridge
├─ BoardingWay
├─ DepartBridge
├─ HistoryPunctuality
├─ HistoryPunctualityArr
├─ SubClassList
├─
├─
├─ dcc 出发地 BJS
├─ dcn 出发地 北京
├─ dpbn 出发机场 首都国际机场T3
├─ dpc PEK
├─ dsmsn 航站楼
├─ dt 2017-10-20 11:55:00
├─ fn 航班号 CA1237
├─ lcfp 票价
├─ lp 实际票价
├─ pr 准点率 96.6100006103516
├─ rt 折扣
├─ sdft
├─ tax 税
├─ lps 半年内 每天的机票价格
├─ tf 中转组合
├─ Routes
├─ 0
├─ fis
├─
├─ tcn
├─ at
├─ c
├─ cn
├─ desc
├─ dt
├─ fcrtp
├─ fut
├─ icp
├─ ics
├─ imre
├─ ire
├─ k
├─
References
[1] 用Python抓取携程网机票信息 过程纪实(上篇)
[2] 用Python抓取携程网机票信息 过程纪实(下篇)
[3] get-ctrip-data
[4] post-803