当前位置：首页 > news >正文

网站地图设计深圳公明网站制作

news 2026/4/16 8:02:42

网站地图设计,深圳公明网站制作,网站制作制作,南通电子商务网站建设⭐️⭐️⭐️⭐️⭐️欢迎来到我的博客⭐️⭐️⭐️⭐️⭐️ #x1f434;作者#xff1a;秋无之地 #x1f434;简介#xff1a;CSDN爬虫、后端、大数据领域创作者。目前从事python爬虫、后端和大数据等相关工作#xff0c;主要擅长领域有#xff1a;爬虫、后端、大数据… ⭐️⭐️⭐️⭐️⭐️欢迎来到我的博客⭐️⭐️⭐️⭐️⭐️ 作者秋无之地简介CSDN爬虫、后端、大数据领域创作者。目前从事python爬虫、后端和大数据等相关工作主要擅长领域有爬虫、后端、大数据开发、数据分析等。欢迎小伙伴们点赞、收藏⭐️、留言、关注关注必回关一、确定目标数据 1、先打开目标网站找到目标数据所在的页面点击逛店铺 2、找到目标数据所在的api或页面通过f12打开调试模式通过搜索关键词找到关键词所在的api或页面 3、观察请求参数 1请求参数有sign和token加密参数 2翻页position参数变动了1_0_0表示第一页2_0_0表示第二页。二、请求接口使用requests库请求接口返回数据 def get_shop_list(self,per10,position1_0_0):获取店铺列表:param per:每页展示条数:param position:开始位置:return:try:url self.uri /druggmp/index/shopListparams {traderName:yaoex_pc,trader:pc,closesignature:yes,timestamp:int(time.time()*1000),}data {traderName:yaoex_pc,trader:pc,closesignature:yes,timestamp:int(time.time()*1000),token:self.token,queryAll:yes,isSearch:yes,per:per,position:position,}self.log_.info(f入参{data})resp requests.post(url,headersself.header,paramsparams,datadata).json()self.log_.info(f出参数量{len(resp[data][shopList])})return resp[data][shopList]except Exception as e:self.log_.error(str(e))return [] 三、数据解析将返回的数据进行正则匹配然后通过遍历提取目标数据获取店铺列表 shop_list self.get_shop_list(per10,positionposition) if not len(shop_list):self.log_.info(已经爬完结束)break #遍历店铺 for shop_ in shop_list:#店铺idshop_id shop_[enterpriseId]#店铺名称shop_name shop_[shopName]#店铺logologo shop_[logo]#是否自营self_str shop_[shopExtTypeText]if self_str and self_str自营:is_self 1else:is_self 0#城市if shipAddress in shop_:city shop_[shipAddress]else:city 四、数据存储数据解析后对数据进行拼接然后持久化存在csv文件 sql freplace into yyc_shop(shop_id,shop_name,logo,shelves,is_self,biz_code,biz_url,yao_url,qs_url,official_name,province,city) values({shop_id},{shop_name},{logo},{shelves},{is_self},{biz_code},{biz_url},{yao_url},{qs_url},{official_name},{province},{city}) self.log_.info(f插入sql{sql}) self.base_.mysql_data(sql) 文件内容五、完整代码完整代码如下 def get_shop_list(self,per10,position1_0_0):获取店铺列表:param per:每页展示条数:param position:开始位置:return:try:url self.uri /druggmp/index/shopListparams {traderName:yaoex_pc,trader:pc,closesignature:yes,timestamp:int(time.time()*1000),}data {traderName:yaoex_pc,trader:pc,closesignature:yes,timestamp:int(time.time()*1000),token:self.token,queryAll:yes,isSearch:yes,per:per,position:position,}self.log_.info(f入参{data})resp requests.post(url,headersself.header,paramsparams,datadata).json()self.log_.info(f出参数量{len(resp[data][shopList])})return resp[data][shopList]except Exception as e:self.log_.error(str(e))return []获取店铺列表 shop_list self.get_shop_list(per10,positionposition) if not len(shop_list): self.log_.info(已经爬完结束) break #遍历店铺 for shop_ in shop_list: #店铺id shop_id shop_[enterpriseId] #店铺名称 shop_name shop_[shopName] #店铺logo logo shop_[logo] #是否自营 self_str shop_[shopExtTypeText] if self_str and self_str自营:is_self 1 else:is_self 0 #城市 if shipAddress in shop_:city shop_[shipAddress] else:city 获取店铺上架数 shelves self.get_shop_drug_count(shop_idshop_id)获取店铺证件 shop_info self.get_shopcert(shop_idshop_id) #地址 address shop_info[data][baseInfo][address] #省份 try:if city and city in address:province address.split(city)[0]else:provs address.split(省)province provs[0]city provs[1].split(市)[0] except:province #供应商全称 official_name shop_info[data][baseInfo][enterpriseName] #图片列表 img_files shop_info[data][files] # 企业营业执照 biz_url # 经营许可证 yao_url # 质量体系调查表 qs_url if len(img_files):for i in img_files:if 营业执照 in i[typeName]:biz_url i[filePath]if 经营许可证 in i[typeName]:yao_url i[filePath]if 质量体系调查表 in i[typeName]:qs_url i[filePath]获取店铺营业执照编码 biz_code if biz_url:biz_code self.get_shop_biz_code(img_linkbiz_url)#替换插入数据库 sql freplace into yyc_shop(shop_id,shop_name,logo,shelves,is_self,biz_code,biz_url,yao_url,qs_url,official_name,province,city) values({shop_id},{shop_name},{logo},{shelves},{is_self},{biz_code},{biz_url},{yao_url},{qs_url},{official_name},{province},{city}) self.log_.info(f插入sql{sql}) self.base_.mysql_data(sql)六、总结 Python爬虫主要分三步请求接口数据解析数据存储版权声明本文章版权归作者所有未经作者允许禁止任何转载、采集作者保留一切追究的权利。

查看全文

http://www.hkea.cn/news/14285361/