当前位置：首页 > news >正文

西安网站设计建设公司交通网站排名优化

news 2026/4/8 9:39:11

西安网站设计建设公司交通,网站排名优化,游侠相册网页设计作业,建筑培训网成绩查询基于不依赖wind、某花顺等第三方平台数据的考虑，尝试直接从财报中解析三大报表进而计算ROE等财务指标，因此需要下载沪深两市的上市公司财报数据，便于后续从pdf中解析三大报表。深市爬虫好做，先放深市爬虫： 根据时间段…

基于不依赖wind、某花顺等第三方平台数据的考虑，尝试直接从财报中解析三大报表进而计算ROE等财务指标，因此需要下载沪深两市的上市公司财报数据，便于后续从pdf中解析三大报表。
深市爬虫好做，先放深市爬虫：

'''
根据时间段下载深交所上市公司财报
path str 指定财报存储路径
time str 财报年度 如[2023,2024]
stock_list list 下载财报的股票代码列表 例如['000001','000002']
financial_statements_type list 财报的类别 例如['annual','semi-annual','quarterly_1','quarterly_3'] 分别为年报、半年报、一季报、三季报
'''
def get_financial_statements(path, time, stock_list, financial_statements_type):url = "https://www.szse.cn/api/disc/announcement/annList"headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3','Content-Type': 'application/json','Connection': 'close'}download_url = "https://disc.static.szse.cn/download"# 逐只股票读取相应pdf报表for stock in stock_list:# 逐年循环for year in time:# 根据财报类型逐个读取pdffor fs_type in financial_statements_type:if fs_type == 'annual':title = "年报"bigCategoryId = '010301'   # 年报查询代码timestart = str(year)+"-12-31"timeend = str(year+1)+"-09-01"  # 防止出现财报更正之后时间节点覆盖不到，统一往后推三个月elif fs_type == 'semi-annual':title = "中报"bigCategoryId = '010303'   # 中报查询代码timestart = str(year) + "-07-01"timeend = str(year) + "-12-31"elif fs_type == 'quarterly_1':title = "一季报"bigCategoryId = '010305'   # 一季报查询代码timestart = str(year) + "-04-01"timeend = str(year) + "-07-31"else:title = "三季报"bigCategoryId = '010307'   # 三季报查询代码timestart = str(year) + "-10-01"timeend = str(year) + "-12-31"data = {"seDate": [timestart, timeend],"stock": [stock],"channelCode": ["listedNotice_disc"],"bigCategoryId": [bigCategoryId],"pageSize": 50,"pageNum": 1}response = requests.post(url=url, data=json.dumps(data), headers=headers)data = json.loads(response.text)["data"]if len(data) == 0 or data is None:print("警告:股票代码:"+stock+" "+str(year)+title+"不存在!")else:for entry in data:# 对摘要栏目做特殊处理if entry['title'].find("报告摘要") < 0:# 检查path路径下stock代码文件夹、年份文件夹是否存在，不存在则创建file_path = path+stock+"/"+str(year)if Tools.check_folder_exists(path+stock) == False:os.mkdir(path+stock)if Tools.check_folder_exists(file_path) == False:os.mkdir(file_path)file = file_path + "/" + str(year) + title + "##" + entry['title'].replace("*", "") + ".pdf"# 检查文件是否已存在，不存在再下载if os.path.exists(file):print("警告:股票代码:" + stock + " " + str(year) + title + "已存在!")else:filecontent = requests.get(download_url + entry["attachPath"])with open(file, "wb") as pdf:pdf.write(filecontent.content)print("股票代码:" + stock + " " + str(year) + title + "写入成功。")# 爬虫调用实例：
# timestart = [2023,2024]
# stock_list = ['000001','000002']
# financial_statements_type = ['annual', 'semi-annual', 'quarterly_1', 'quarterly_3']
# SZ_financial_statement_path = "F:/data/SZ/"
# get_financial_statements(SZ_financial_statement_path, timestart,stock_list,financial_statements_type)

沪市爬虫：

'''
根据时间段下载上交所上市公司财报
time str 财报年度 如2024、2023
stock_list list 下载财报的股票代码列表 例如['000001','000002']
financial_statements_type list 财报的类别 例如['annual','semi-annual','quarterly_1','quarterly_3'] 分别为年报、半年报、一季报、三季报
'''
def get_financial_statements(path, time, stock_list, financial_statements_type):headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3','Referer': 'https://www.sse.com.cn/','Connection': 'close'}download_url = "https://www.sse.com.cn"# 逐只股票读取相应pdf报表for stock in stock_list:# 逐年循环for year in time:# 根据财报类型逐个读取pdffor fs_type in financial_statements_type:if fs_type == 'annual':title = "年报"bigCategoryId = 'YEARLY'  # 年报查询代码timestart = str(year) + "-12-31"timeend = str(year + 1) + "-09-01"  # 防止出现财报更正之后时间节点覆盖不到，统一往后推三个月elif fs_type == 'semi-annual':title = "中报"bigCategoryId = 'QUATER2'  # 中报查询代码timestart = str(year) + "-07-01"timeend = str(year) + "-12-31"elif fs_type == 'quarterly_1':title = "一季报"bigCategoryId = 'QUATER1'  # 一季报查询代码timestart = str(year) + "-04-01"timeend = str(year) + "-07-31"else:title = "三季报"bigCategoryId = 'QUATER3'  # 三季报查询代码timestart = str(year) + "-10-01"timeend = str(year) + "-12-31"url = "https://query.sse.com.cn/security/stock/queryCompanyBulletin.do?jsonCallBack=jsonpCallback"+str(random.randint(10000, 999999))+"&isPagination=true&pageHelp.pageSize=50&pageHelp.pageNo=1&pageHelp.beginPage=1&pageHelp.cacheSize=1&pageHelp.endPage=1&productId="+stock+"&securityType=0101%2C120100%2C020100%2C020200%2C120200&reportType2=DQBG&reportType="+bigCategoryId+"&beginDate="+timestart+"&endDate="+timeendresponse = requests.get(url=url, headers=headers)datas = json.loads(response.text.split('"keyWord":null,"pageHelp":')[1].split(',"productId":')[0])['data']if len(datas) == 0 or datas is None:print("警告:股票代码:" + stock + " " + str(year) + title + "不存在!")else:for entry in datas:# 对摘要栏目做特殊处理，去除摘要if entry['TITLE'].find("摘要") < 0:# 检查path路径下stock代码文件夹、年份文件夹是否存在，不存在则创建file_path = path + stock + "/" + str(year)if Tools.check_folder_exists(path + stock) == False:os.mkdir(path + stock)if Tools.check_folder_exists(file_path) == False:os.mkdir(file_path)file = file_path + "/" + str(year) + title + "##" + entry['TITLE'].replace("*", "") + ".pdf"# 检查文件是否已存在，不存在再下载if os.path.exists(file):print("警告:股票代码:" + stock + " " + str(year) + title + "已存在!")else:filecontent = requests.get(download_url + entry["URL"])with open(file, "wb") as pdf:pdf.write(filecontent.content)print("股票代码:" + stock + " " + str(year) + title + "写入成功。")
# timestart = [2023]
# stock_list = ['600011']
# financial_statements_type = ['annual', 'semi-annual', 'quarterly_1', 'quarterly_3']
# SZ_financial_statement_path = "F:/data/SH/"
# get_financial_statements(SZ_financial_statement_path, timestart,stock_list,financial_statements_type)