服务周到的网站建站,做信息发布类网站,毕设做购物网站容易吗,建立中英文网站行政区划获取
获取2023年的行政区划#xff0c;并以 编码: 省市区 格式保存为字典方便后续调用 注#xff1a;网址可能会更新#xff0c;根据最新的来
# 获取并保存行政区划代码
import requests
from lxml import etree
import jsondef fetch_html(url):并以 编码: 省市区 格式保存为字典方便后续调用 注网址可能会更新根据最新的来
# 获取并保存行政区划代码
import requests
from lxml import etree
import jsondef fetch_html(url):获取网页内容try:response requests.get(url)response.raise_for_status()response.encoding utf-8return response.textexcept requests.RequestException as e:print(f请求失败: {e})return Nonedef parse_html(html):解析HTML并生成行政区划字典if not html:return Nonehtml_tree etree.HTML(html)rows html_tree.xpath(//*[text()行政区划代码]/../following-sibling::tr)location_dict {}current_province current_city for row in rows:if not row.xpath(./td[2]/text()):continuecode row.xpath(./td[2]/text())[0].strip()name row.xpath(./td[3]/text())[0].replace(*, ).strip()if code.endswith(0000): # 省级current_province namelocation_dict[code] {province: current_province, city: , district: }elif code.endswith(00): # 市级current_city namelocation_dict[code] {province: current_province, city: current_city, district: }else: # 区县级location_dict[code] {province: current_province, city: current_city, district: name}return location_dictdef generate_full_address(location_dict):生成完整的地址字符串finally_location_dict {}for code, location in location_dict.items():full_address f{location[province]} {location[city]} {location[district]}.strip()finally_location_dict[code] full_addressreturn finally_location_dictdef save_to_json(data, filename):将数据保存为JSON文件try:with open(filename, w, encodingutf-8) as f:json.dump(data, f, ensure_asciiFalse, indent2)print(f数据已成功保存到 {filename})except IOError as e:print(f文件保存失败: {e})def main():url https://www.mca.gov.cn/mzsj/xzqh/2023/202301xzqh.htmlhtml fetch_html(url)if html:location_dict parse_html(html)if location_dict:finally_location_dict generate_full_address(location_dict)save_to_json(finally_location_dict, city_and_code.json)if __name__ __main__:main()# 提取编码对应省市区
# with open(rcity_and_code.json, r, encodingutf-8) as f:
# dict_ json.load(f)
# print(dict_[320507])