, a method of

recently made a website using Tmall and Taobao crawl product information from the web site, first look at the mobile phone terminal "found in the react, do not know can not do, so we consider crawling data from the PC entrance, but when crawling and URL data acquisition don't get the price, inventory and other information, careful study found is the asynchronous request another interface, but the interface to use refer to get the data, and then through the following way to write a simple crawler, crawling for goods and merchandise preview first category price, inventory etc..

, two

 function crawlUrl code as follows: 

($url) {import ('PhpQuery.Curl'); $curl=new (Curl); $result = $curl-> read ($url); $content = mb_convert_encoding ($result['content'],'UTF-8','UTF-8, GBK, GB2312, BIG5'); ($myres=array); if (($url, strrpos'taobao.com')! =false) {/ / match is under the frame of if (strpos ($content, 'this baby has the shelf')! ==false) {return false}; preg_match ("|itemId" (. *)'|isU, $content, $match); $item_id=$match[1]; preg_match ("|sellerId" (. *)'|isU ". $content, $match); $sellet_id=$match[1]; preg_match (|< title> < /title> (2) |isU, $content, $match); $title=$match[1]; / / $ch = curl_init inventory price information (); curl_setopt ($ch, CURLOPT_URL, itemId='.$item_id.'& sellerId='.$sellet_id.'&'https://detailskip.taobao.com/service/getData/1/p1/item/detail/sib.htm? Modules=dynStock, QRcode, viewer, price, duty, xmpPromotion, delivery, UPP, activity, FQG, zjys, amountRestriction, couponActivity, soldQuantity, originalPrice, tradeContract& callback=onSibRequestSuccess'); $opt[CURLOPT_HEADER]=false; $opt[CURLOPT_CONNECTTIMEOUT]=15; $opt[CURLOPT_TIMEOUT]=300; $opt[CURLOPT_AUTOREFERER]=true; $opt[CURLOPT_USERAGENT]='Mozilla/5.0 (Windows NT 6.1 AppleWebKit/536.11 (KHTML), like Gecko Chrome/20.0.1132.47 Safari/536.11'); curl_setopt_array ($ch, $opt); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_REFERER, $ URL); curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, false); $out_put=curl_exec ($ch); curl_close ($ch); $res=str_replace (('onSibRequestSuccess, $out_put ', ""); $res=rtrim ($res'); 1'; $result=json_decode ($RES), true); / / check out the picture information preg_match ('|< UL id= J_UlThumb > < /ul> (2) |isU', $content, $match); preg_match_all ('/< img data-src= "(. *?)" //', $match[1], $images); $myres['title']= str_replace ('taobao.com', '', $title); $myres['price']=current ($result['data']['originalPrice']); $myres['act_price']=current ($result['data']['promotion']['promoData']); $myres['stock']=$result['data']['dynStock']['stock']; $myres['banners']=$images[1]}else{; / /, whether under the frame of I F (strpos ($content, 'this baby has the shelf')! ==false) {return} $start=strpos (false; $url,'& id='); $item_id=substr ($url, $start+4,12); if (! Is_numeric ($item_id)) {$start=strpos ($url' id='? $end=strpos ($URL);'&, spm';); $item_id=substr ($url, $start+4, $end-$start-4);} preg_match (|< title> < /title> (2) |isU, $content, $match); $title=$match[1]; cachedTimestamp=1500562177777& queryMemberRight=true& $myurl='https://mdskip.taobao.com/core/initItemDetail.htm? CartEnable=true& offlineShop=false& addressLevel=2& itemId='.$item_id.'& tryBeforeBuy=false& isAreaSell=false& tmallBuySupport=true& isPurchaseMallPage= false& household=false& isForbidBuyItem=fals E& service3C=false& isRegionLevel=false& showShopProm=false& isSecKill=false& sellerPreview=false& isUseInventoryCenter=false& isApparel=true& callback=setMdskip& timestamp=1500562172109& isg=AiUlDZFWmP/ sMgVurQSILU3Ytet/Zdis& isg2=Ajk51JIhRFqKzxmiNPP6dkYxSKXT7iySkzSTeVtu9WDf4ll0o5Y9yKdyEtHu'; / / $ch = curl_init (stock price information); curl_setopt ($ch, CURLOPT_URL, $myurl); $opt[CURLOPT_HEADER]=false; $opt[CURLOPT_CONNECTTIMEOUT]=15; $opt[CURLOPT_TIMEOUT]=300; $opt[CURLOPT_AUTOREFERER]=true; $opt[CURLOPT_USERAGENT]='Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko Chrome/20.0.1132.47 Safari/536.11'; curl_setopt_array ($ch), $opt curl_setopt ($ch, CURLOP); T_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_REFERER, $url); curl_setopt ($ch, CURLOPT_SSL_VERIFYPEER, false); $out_put=curl_exec ($ch); curl_close ($ch); $res = mb_convert_encoding ($out_put,'UTF-8','UTF-8, GBK, GB2312, BIG5'); $res=str_replace ('setMdskip', "$res", $res=str_replace (' (); ', ""); $res $res=str_replace (' '), "$res"); $result=json_decode ($res, true); "$nowk="; "$nowstore=" ($result['defaultModel']['inventoryDO']['skuQuantity'] as; foreach $k=> $val) {$nowk=$k; $nowstore=$val; break;} $myres['title']=str_replace ('-tmall.com Tmall', '', $title); $myres['price']=$result['defaultModel']['itemPriceResultDO'] ['priceInfo'][$nowk]['price'] $myres['act_pr; Ice']=isset ($result['defaultModel']['itemPriceResultDO']['priceInfo'][$nowk]['suggestivePromotionList'])? $result['defaultModel']['itemPriceResultDO']['priceInfo'][$nowk]['suggestivePromotionList']: $result['defaultModel']['itemPriceResultDO']['priceInfo'][$nowk]; $myres['stock']=$result['defaultModel']['inventoryDO']['totalQuantity']? $result['defaultModel']['inventoryDO']['totalQuantity']: $nowstore['quantity']; / / check out the picture information preg_match ('|< UL id= "J_UlThumb" > (. *); < /ul> |isU', $content, $match); preg_match_all ('/< img src= "(. *?)" //', $match[1], $images $myres['banners']=$images[1]; return $myres);};}

the code to use phpquery library, but no use, the direct use of Curl on the line, with The data of body crawling can be viewed through the results. The method does not distinguish the links between Taobao and Tmall, but the premise is that it must be PC link. Moreover, regular writing is not standardized, so it can rewrite regular data to match data itself.

is the whole content of this article, I hope to help you, and hope that you can support a lot of scripting home.

This paper fixed link:http://www.script-home.com/php-crawling-tmall-and-taobao-commodity-data.html | Script Home | +Copy Link

Article reprint please specify:PHP crawling Tmall and Taobao commodity data | Script Home

You may also be interested in these articles!