#1 netkeibaのスクレイピングに挑んでみた
2021/02/02
クソコードなので、コメントで修正あればください。当方環境では動作しますので、動作しないときは自力で解決してください。
netkeibaのプレミアム会員登録が必要になります。なおこのスクリプトを利用して何らかの不利益が利用者に生じた場合でも、
当方は、一切責任を負いません。利用される方はこれに同意していただいたものとみなします。
環境
macOS Catalina(10.15.7), Python3.9.0, SQLite
import requests
from bs4 import BeautifulSoup
import lxml.html
import pandas as pd
import pandas.io.sql as psql
from pandas_datareader import DataReader
import urllib.parse
import re
import itertools
import sqlite3
import json
from bikou import bikou
import math
import time
import datetime
from tqdm import tqdm
import traceback
import sys
# 開始時間表示
start = datetime.datetime.now()
print(start)
# レースID整形用
def numStr(num):
if num >= 10:
return str(num)
else:
return '0' + str(num)
try:
# 開催年
for year in range(2020, 2021):
# レース場コード
for placeCode in range(1, 11):
# 開催
for kaisai in range(1, 11):
# 日目
for nitime in range(1, 13): # 念のため毎年13-15日のレンジがないか回すこと
# レース番号
for raceNum in range(1, 13):
raceId = str(year) + numStr(placeCode) + \
numStr(kaisai) + numStr(nitime) + \
numStr(raceNum)
Base = 'https://race.netkeiba.com/race/result.html?race_id=' # レース結果のURL
url = Base + raceId # レース結果のURL
print('\n' + raceId)
time.sleep(1) # 1秒間隔でスクレイピング
html = requests.get(url)
soup = BeautifulSoup(html.content, 'html.parser')
# レース名取得
raceName = soup.find(class_="RaceName")
if raceName is None: # レース結果テーブル取得した出来なかった場合break
break
else:
raceName = raceName.text.strip()
# 開催日取得
try:
raceDateStr = soup.find(
class_="Refundlink").a.get('href')
raceDate = re.search(
r'\d+', raceDateStr).group()
raceDateYear = raceDate[0:4]
raceDateMonth = raceDate[4:6]
raceDateDay = raceDate[6:]
raceDateStr = raceDateYear + '-' + raceDateMonth + '-' + raceDateDay
except AttributeError:
pass
# レース情報(芝・ダート・障害・距離)取得
raceData = soup.find(class_="RaceData01")
# 芝・ダート・障害/距離取得
baba_distance = raceData.span.text.strip()
# 芝・ダート・障害 取得
baba = baba_distance[0]
# 天気取得
weather = raceData.text[raceData.text.find(
'天候:')+3:raceData.text.find('天候:')+4]
# 距離取得
distance = baba_distance.replace(
baba, '').replace('m', '')
# レースグレード取得
raceData = soup.find(class_="RaceData02")
raceDataList = raceData.text.splitlines()
place = raceDataList[2] # レース情
grade = raceDataList[5] # レースグレード
kinryoCondition = raceDataList[8] # 斤量条件
# 馬場状態取得
if soup.find(class_="Item04"):
babaCondition = soup.find(
class_="Item04").text[5:]
elif soup.find(class_="Item03"):
babaCondition = soup.find(
class_="Item03").text[5:]
else:
pass
# 馬情報取得
horse_element = soup.select("span.Horse_Name")
# tagリストを文字列リストに変換
horse_list_str = []
for x in horse_element:
horse_list_str.append(str(x))
# 馬IDリストを作成
horseIdList = [] # 馬IDリスト
for horse_list in horse_list_str:
horseIdList.append(
re.sub(r"\D", "", horse_list))
# 騎手情報取得
jockey_element = soup.select("td.Jockey")
# tagリストを文字列リストに変換
jockey_list_str = []
for x in jockey_element:
jockey_list_str.append(str(x))
jockeyIdList = [] # 騎手IDリスト
for jockey_list in jockey_list_str:
jockeyIdList.append(
re.sub(r"\D", "", jockey_list))
# jockey_elementで無駄にジョッキーIDを取得してくる可能性あるので正確なジョッキー数分にスライス
jockeyIdList = jockeyIdList[0: len(
horseIdList)]
# レース結果テーブル取得
try:
df_raceResult = pd.read_html(url, header=0)
except ValueError:
break
# 別モジュールから備考データ取得
if __name__ == '__main__':
try:
bikouList, timeIndex = bikou(raceId)
except TypeError:
break
# タイム指数が更新されたらDBをUPDATEする必要あり
if baba != '障' and timeIndex[0] != '':
timeIndex = [int(index)
for index in timeIndex]
# カラム名を物理名に変更
df_raceResult[0] = df_raceResult[0].rename(columns={'着順': 'Result_Num', '枠': 'Waku', '馬番': 'Num', '馬名': 'Horse_Name', '性齢': 'Age', '斤量': 'Kinryo',
'騎手': 'Jockey', 'タイム': 'Time', '着差': 'CHAKUSA', '人気': 'NINKI', '単勝オッズ': 'Odds', '後3F': 'A3F', 'コーナー通過順': 'TukaRank', '厩舎': 'Kyusya', '馬体重(増減)': 'Weight'})
# ジョッキー名の前に付いている記号を削除
jockeyList = []
for jockey in df_raceResult[0]['Jockey']:
if '▲' in jockey:
jockeyList.append(jockey[1:])
elif '△' in jockey:
jockeyList.append(jockey[1:])
elif '☆' in jockey:
jockeyList.append(jockey[1:])
elif '★' in jockey:
jockeyList.append(jockey[1:])
elif '◇' in jockey:
jockeyList.append(jockey[1:])
else:
jockeyList.append(jockey)
# 性別・年齢分割
sexList = [sex[0:1]
for sex in df_raceResult[0]['Age'].values]
ageList = [age[1:]
for age in df_raceResult[0]['Age'].values]
# 馬体重・増減分割
weightList = [str(weight)[:str(weight).find('(')]
for weight in df_raceResult[0]['Weight']]
weightChageList = [str(weight)[str(weight).find('(') + 1: str(weight).find(')')]
for weight in df_raceResult[0]['Weight']]
df_raceResult[0] = df_raceResult[0].drop(
'Age', axis=1)
df_raceResult[0]['Jockey'] = jockeyList
df_raceResult[0]['Weight'] = weightList
df_raceResult[0]['WeightChange'] = weightChageList
df_raceResult[0]['sex'] = sexList # 性別
df_raceResult[0]['age'] = ageList # 年齢
# 出遅れ・不利データ付与
df_raceResult[0]['timeIndex'] = timeIndex
# 出遅れ・不利データ付与
df_raceResult[0]['bikou'] = bikouList
df_raceResult[0]['raceId'] = [
int(raceId)] * len(df_raceResult[0]) # レース番号を付加
# 馬IDを付加
df_raceResult[0]['horseID'] = horseIdList
# 騎手IDを付加
df_raceResult[0]['jockeyID'] = jockeyIdList
df_raceResult[0]['raceName'] = [raceName] * \
len(df_raceResult[0]) # レース名を付加
df_raceResult[0]['baba'] = [baba] * \
len(df_raceResult[0]) # 馬場を付加
df_raceResult[0]['distance'] = [distance] * \
len(df_raceResult[0]) # 距離を付加
df_raceResult[0]['babacondition'] = [
babaCondition] * len(df_raceResult[0]) # 馬場状態を付加
df_raceResult[0]['place'] = [place] * \
len(df_raceResult[0]) # レース場所を付加
df_raceResult[0]['grade'] = [grade] * \
len(df_raceResult[0]) # グレードを付加
df_raceResult[0]['kinryoCondition'] = [
kinryoCondition] * len(df_raceResult[0]) # 斤量条件を付加
df_raceResult[0]['raceDate'] = [raceDateStr] * \
len(df_raceResult[0]) # 開催日を付加
df_raceResult.append
# レースラップ取得
raceRap = ''
if baba != '障':
try: # レースラップ取得出来た時
raceRap = df_raceResult[5][1:].to_json(
orient='records')
raceRap = raceRap.replace(
'[', '').replace(']', '')
except: # レースラップ取得出来なかった時
raceRap = df_raceResult[3][1:].to_json(
orient='records')
raceRap = raceRap.replace(
'[', '').replace(']', '')
# 不要なダブルコーテーション削除
try:
raceRapDict = json.loads(raceRap)
for k in raceRapDict.keys():
if type(raceRapDict[k]) is str:
raceRapDict[k] = float(
raceRapDict[k])
raceRap = {}
raceRapList = []
for k in raceRapDict.keys():
raceRapList.append(raceRapDict[k])
for i in range(len(raceRapList)):
if i == 0:
continue
else:
raceRapList[i] = (
raceRapList[i - 1]) + (raceRapList[i])
raceRapList = [math.floor(
rap*10) / 10 for rap in raceRapList] # 積算rapを作成
raceRapSekiDict = {}
for i, (key, value) in enumerate(raceRapDict.items()):
raceRapSekiDict[key] = raceRapList[i]
raceRap['rap'] = raceRapDict # 単一区間rap
# 積算rap
raceRap['sekiRap'] = raceRapSekiDict
raceRap = json.dumps(raceRap) # json文字列へ
except json.decoder.JSONDecodeError:
raceRap = ''
# 払い戻し情報取得
refundLists = []
refundtables = soup.find_all(
'table', {'class': 'Payout_Detail_Table'})
for refundtable in refundtables:
refundRows = refundtable.find_all("tr")
for refundRow in refundRows:
refund = [td.text for td in refundRow.find_all(
["td", "th", 'div'])]
refundLists.append(refund)
refundListNew = []
for refundList in refundLists:
# 空要素を除いたリストを作成
refundList = [
s for s in refundList if s != '']
refundListNew.append(refundList)
del refundListNew[0][1] # 単勝の不要な要素を削除
del refundListNew[1][1] # 複勝の不要な要素を削除
refundlists = []
# 改行コードを削除したリストの作成
for refundlist in refundListNew:
refundlists.append([str.strip('\n').replace('\n', ',').replace(
'人気', ',').strip(',') for str in refundlist])
tansho_dict = {}
fukusho_dict = {}
wakuren_dict = {}
umaren_dict = {}
wide_dict = {}
umatan_dict = {}
sanfuku_dict = {}
santan_dict = {}
if len(refundlists) == 8: # 枠連あるとき
for i in range(len(refundlists)):
if i == 0: # 単勝
refundlists[i][0] = 'tansho'
kumiSu = (
"".join(refundlists[i])).count('円')
if kumiSu != 1: # 単勝が複数あるとき
for kumi in range(kumiSu):
if kumi == 0:
tansho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
tansho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif kumiSu == 1: # 単勝が1つのとき
tansho_dict[f'kumi{kumiSu}'] = {'umaban': int(refundlists[i][kumiSu]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumiSu-1].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumiSu-1])}
else:
pass
elif i == 1: # 複勝
refundlists[i][0] = 'fukusho'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
fukusho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
fukusho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif i == 2: # 枠連
refundlists[i][0] = 'wakuren'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
wakuren_dict[f'kumi{kumi + 1}'] = {'wakuban': refundlists[i][kumi + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
wakuren_dict[f'kumi{kumi + 1}'] = {'wakuban': refundlists[i][1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 3: # 馬連
refundlists[i][0] = 'umaren'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
umaren_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
umaren_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 4: # ワイド
refundlists[i][0] = 'wide'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
wide_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
wide_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 5: # 馬単
refundlists[i][0] = 'umatan'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
umatan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
umatan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 6: # 3連腹
refundlists[i][0] = 'sanfuku'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
sanfuku_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
sanfuku_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 7: # 3連単
refundlists[i][0] = 'santan'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
santan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
santan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
else:
pass
elif len(refundlists) == 7: # 枠連ないとき
for i in range(len(refundlists)):
if i == 0: # 単勝
refundlists[i][0] = 'tansho'
kumiSu = (
"".join(refundlists[i])).count('円')
if kumiSu != 1: # 単勝が複数あるとき
for kumi in range(kumiSu):
if kumi == 0:
tansho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
tansho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif kumiSu == 1: # 単勝が1つのとき
tansho_dict[f'kumi{kumiSu}'] = {'umaban': int(refundlists[i][kumiSu]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumiSu-1].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumiSu-1])}
else:
pass
elif i == 1: # 複勝
refundlists[i][0] = 'fukusho'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
fukusho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
fukusho_dict[f'kumi{kumi + 1}'] = {'umaban': int(refundlists[i][kumi + 1]),
'price': int(refundlists[i][kumiSu + 1].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumiSu + 2].split(',')[kumi])}
elif i == 2: # 馬連
refundlists[i][0] = 'umaren'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
umaren_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
umaren_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 3: # ワイド
refundlists[i][0] = 'wide'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
wide_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
wide_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 4: # 馬単
refundlists[i][0] = 'umatan'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
umatan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
umatan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 5: # 三連複
refundlists[i][0] = 'sanfuku'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
sanfuku_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
sanfuku_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
elif i == 6: # 三連単
refundlists[i][0] = 'santan'
kumiSu = (
"".join(refundlists[i])).count('円')
for kumi in range(kumiSu):
if kumi == 0:
santan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumi + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][kumi + 2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][kumi + 3].split(',')[kumi])}
elif kumi != 0:
kumiTmp = 0
santan_dict[f'kumi{kumi + 1}'] = {'umaban': refundlists[i][kumiTmp + 1].split(',,')[kumi].replace(',', '-'),
'price': int(refundlists[i][2].split('円')[kumi].replace(',', '')), 'ninki': int(refundlists[i][3].split(',')[kumi])}
else:
pass
else:
pass
# 辞書→json文字列
tansho = json.dumps(tansho_dict)
fukusho = json.dumps(fukusho_dict)
wakuren = json.dumps(wakuren_dict)
umaren = json.dumps(umaren_dict)
wide = json.dumps(wide_dict)
umatan = json.dumps(umatan_dict)
sanfuku = json.dumps(sanfuku_dict)
santan = json.dumps(santan_dict)
# レース情報フレーム作成
df_raceInfo = pd.DataFrame([[raceId, raceName, raceDateStr, weather, place, baba, grade, distance, babaCondition, kinryoCondition, raceRap, tansho, fukusho, wakuren, umaren, wide, umatan, sanfuku, santan]], columns=[
'raceId', 'raceName', 'raceDate', 'weather', 'place', 'baba', 'grade', 'distance', 'babaCondition', 'kinryoCondition', 'raceRap', 'tansho', 'fukusho', 'wakuren', 'umaren', 'wide', 'umatan', 'sanfuku', 'santan'])
# ジョッキーマスタフレーム作成
df_jockey = df_raceResult[0].loc[:, [
'jockeyID', 'Jockey']]
# 馬マスターフレーム作成
df_horse = df_raceResult[0].loc[:,
['horseID', 'Horse_Name']]
# sqlite3に接続
con = sqlite3.connect('horse.db')
cur = con.cursor()
# レース結果情報挿入
try: # raceIDの衝突が起きたとき例外発生
# レース結果情報挿入
df_raceResult[0].to_sql('race_results', con,
if_exists='append', index=None)
except sqlite3.IntegrityError:
pass
try:
# レース情報挿入
df_raceInfo.to_sql('race_infos', con,
if_exists='append', index=None)
except sqlite3.IntegrityError:
pass
# 馬マスタへ挿入
for i in range(len(df_horse)):
try:
df_horse.iloc[i:i+1].to_sql('horse', con,
if_exists='append', index=None)
except sqlite3.IntegrityError:
pass
# 騎手マスタへ挿入
for i in range(len(df_jockey)):
try:
df_jockey.iloc[i:i+1].to_sql('jockey', con,
if_exists='append', index=None)
except sqlite3.IntegrityError:
pass
con.commit()
con.close()
except Exception as e:
print(e)
print(raceId)
print(traceback.format_exc())
# 終了時間表示
print(datetime.datetime.now())
# 所用時間表示
elapsed_time = start = datetime.datetime.now() - start
print("elapsed_time:{0}".format(elapsed_time) + "[sec]")
import requests
import csv
import urllib
from bs4 import BeautifulSoup
from urllib.parse import urljoin
def bikou(raceID):
Base = 'https://db.sp.netkeiba.com/race/'
url = Base + raceID
# メールアドレスとパスワードの指定
USER = "******"
PASS = "******"
login_info = {
"login_id": USER,
"pswd": PASS,
}
# セッションを開始
session = requests.session()
url_login = "https://regist.netkeiba.com/account/?pid=login&action=auth"
ses = session.post(url_login, data=login_info)
res = session.get(url)
soup = BeautifulSoup(res.content, "html.parser")
table = soup.find(
'table', {'class': 'table_slide_body ResultsByRaceDetail'})
if table is not None:
rows = table.find_all("tr")
bikou = []
time_index = []
for row in rows:
row = [td.text for td in row.find_all(["td", "th"])]
bikou.append(row[17].replace('\n', ''))
time_index.append(row[9].replace('\n', ''))
del bikou[0]
del time_index[0]
return bikou, time_index
else:
pass
中央競馬ランキング
↑↑↑