省市区的行政区域数据2021(国家统计局)
前言
获取最新的省市区县字典数据代码 免责申明:爬取数据造成任何问题,概不负责,本文只做技术分享和学习。
一、python
import requests from bs4 import BeautifulSoup import pymysql import time class Administrative(object): def __init__(self): self.db = pymysql.connect(host="127.0.0.1", port=3306, database="county_db", charset="utf8mb4", user="root", password="admin") self.main() self.db.close() def main(self): base_url = http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/ trs = self.get_response(base_url, provincetr) for tr in trs: # 循环每一行 datas = [] for td in tr: # 循环每个省 province_name = td.a.get_text() province_url = base_url + td.a.get(href) print(province_name) trs = self.get_response(province_url, None) for tr in trs[1:]: # 循环每个市 city_code = tr.find_all(td)[0].string city_name = tr.find_all(td)[1].string city_url = base_url + tr.find_all(td)[1].a.get(href) trs = self.get_response(city_url, None) for tr in trs[1:]: # 循环每个区 county_code = tr.find_all(td)[0].string county_name = tr.find_all(td)[1].string data = [province_name, city_code, city_name, county_code, county_name] print(data) datas.append(data) time.sleep(1) sql = "insert into china (province_name,city_code,city_name,county_code,county_name) values (%s,%s,%s,%s,%s)" self.connect_mysql(sql, datas) def get_response(self, url, attr): response = requests.get(url) response.encoding = utf-8 # 编码转换 soup = BeautifulSoup(response.text, lxml) table = soup.find_all(tbody)[1].tbody.tbody.table if attr: trs = table.find_all(tr, attrs={ class: attr}) else: trs = table.find_all(tr) return trs def connect_mysql(self, sql, data): cursor = self.db.cursor() try: result = None if data: if isinstance(data[0], list): cursor.executemany(sql, data) else: cursor.execute(sql, data) else: cursor.execute(sql) result = cursor.fetchall() except Exception as e: print(e) self.db.rollback(); finally: cursor.close() self.db.commit(); # 提交操作 return result if __name__ == __main__: Administrative()
二、数据表SQL
CREATE TABLE `china` ( `province_name` varchar(200) NOT NULL, `city_code` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL, `city_name` varchar(200) NOT NULL, `county_code` varchar(32) NOT NULL, `county_name` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL, PRIMARY KEY (`city_code`,`county_code`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
总结
亲测,可用,修改base_url,可以获取每年最新数据!