爬虫扩展:querylist
环境
PHP >= 7.1 !!!
安装
1
composer require jaeger/querylist
后端
1 |
composer require jaeger/querylist |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
// 爬虫 public function crawler() { if(request()->post()){ $url = $this->param['url']; $range = $this->param['range']; $pageS = $this->param['pageS']; $pageE = $this->param['pageE']; $rules = []; foreach($this->param['content'] as $v){ if(!empty($v['field']) && !empty($v['tag']) && !empty($v['val'])) $rules[$v['field']] = [$v['tag'], $v['val']]; } // 循环从开始到结束每页数据 $addVals = []; for($i = $pageS; $i <= $pageE; $i++){ $urls = str_replace('#', $i, $url); $ql = QueryList::rules($rules)->range($range); $data = $ql->get($urls)->queryData(); foreach($data as $v){ $addVals[] = $v; } $ql->destruct(); } // 爬取详情页 foreach($addVals as $k => $v){ $info = $this->param['info']; $info_data = QueryList::get(trim($v['source']))->rules([ $info['field'] => [$info['tag'], $info['val']] ])->queryData(); $addVals[$k][$info['field']] = $info_data[$info['field']]; // 这里写入库 } QueryList::destructDocuments(); } return $this->fetch(); } |