Skip to content

Commit 4e183bf

Browse files
committed
remove pandas and refactor
1 parent 20cc74c commit 4e183bf

15 files changed

+150
-184
lines changed

.tool-versions

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
python 3.9.14
1+
python 3.10.7

README.md

+14-17
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
<p align="center">
88
<a href="#about">About</a> •
9+
<a href="#features">Features</a> •
910
<a href="#usage">Usage</a> •
1011
<a href="#credits">Credits</a> •
1112
<a href="#license">License</a>
@@ -17,6 +18,12 @@
1718

1819
Working with FIDE oficial data is not simple, mainly because they don't have an API. That's the reason I made a simple API with FastAPI to scrape the data from their own website and provide it as JSON over HTTP requests.
1920

21+
## Features
22+
23+
- Get top players list
24+
- Get player info
25+
- Get player history
26+
2027
## Usage
2128

2229
You will need git and asdf installed, from your terminal:
@@ -26,22 +33,24 @@ git clone https://github.com/cassiofb-dev/fide-api
2633

2734
cd fide-api
2835

29-
asdf install python 3.9.14
36+
asdf install python 3.10.7
3037

31-
asdf local python 3.9.14
38+
asdf local python 3.10.7
3239

3340
python -m venv venv
3441

3542
source venv/bin/activate
3643

3744
pip install -r requirements.txt
3845

39-
uvicorn main:app --reload
46+
uvicorn src.api:app --reload
4047
```
4148

49+
To see the docs go to ``localhost:8000/docs``
50+
4251
## Credits
4352

44-
This project uses git, python (3.9.14) and asdf.
53+
This project uses git, python (3.10.7) and asdf.
4554

4655
The following python dependecies were used:
4756
```txt
@@ -50,31 +59,19 @@ beautifulsoup4==4.11.1
5059
certifi==2022.9.24
5160
charset-normalizer==2.1.1
5261
click==8.1.3
53-
dnspython==2.2.1
54-
email-validator==1.3.0
5562
fastapi==0.85.0
5663
h11==0.14.0
5764
httptools==0.5.0
5865
idna==3.4
59-
itsdangerous==2.1.2
60-
Jinja2==3.1.2
61-
MarkupSafe==2.1.1
62-
numpy==1.23.3
6366
orjson==3.8.0
64-
pandas==1.5.0
6567
pydantic==1.10.2
66-
python-dateutil==2.8.2
6768
python-dotenv==0.21.0
68-
python-multipart==0.0.5
69-
pytz==2022.4
7069
PyYAML==6.0
7170
requests==2.28.1
72-
six==1.16.0
7371
sniffio==1.3.0
7472
soupsieve==2.3.2.post1
7573
starlette==0.20.4
76-
typing_extensions==4.3.0
77-
ujson==5.5.0
74+
typing_extensions==4.4.0
7875
urllib3==1.26.12
7976
uvicorn==0.18.3
8077
uvloop==0.17.0

requirements.txt

+1-13
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,19 @@ beautifulsoup4==4.11.1
33
certifi==2022.9.24
44
charset-normalizer==2.1.1
55
click==8.1.3
6-
dnspython==2.2.1
7-
email-validator==1.3.0
86
fastapi==0.85.0
97
h11==0.14.0
108
httptools==0.5.0
119
idna==3.4
12-
itsdangerous==2.1.2
13-
Jinja2==3.1.2
14-
MarkupSafe==2.1.1
15-
numpy==1.23.3
1610
orjson==3.8.0
17-
pandas==1.5.0
1811
pydantic==1.10.2
19-
python-dateutil==2.8.2
2012
python-dotenv==0.21.0
21-
python-multipart==0.0.5
22-
pytz==2022.4
2313
PyYAML==6.0
2414
requests==2.28.1
25-
six==1.16.0
2615
sniffio==1.3.0
2716
soupsieve==2.3.2.post1
2817
starlette==0.20.4
29-
typing_extensions==4.3.0
30-
ujson==5.5.0
18+
typing_extensions==4.4.0
3119
urllib3==1.26.12
3220
uvicorn==0.18.3
3321
uvloop==0.17.0

scraper/fide_scraper.py

-58
This file was deleted.

scraper/functions/__init__.py

-3
This file was deleted.

scraper/functions/player_history.py

-42
This file was deleted.

scraper/functions/player_info.py

-24
This file was deleted.

main.py renamed to src/api.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from fastapi import FastAPI
44
from fastapi.responses import ORJSONResponse
55

6-
from scraper import fide_scraper
6+
from src.scraper import fide_scraper
77

88
app = FastAPI(default_response_class=ORJSONResponse)
99

@@ -15,15 +15,15 @@ def home():
1515

1616
@app.get("/top_players/")
1717
async def top_players(limit: int = 100, history: bool = False):
18-
response = fide_scraper.top_players(limit=limit, history=history)
18+
response = fide_scraper.get_top_players(limit=limit, history=history)
1919
return response
2020

2121
@app.get("/player_history/")
2222
async def player_history(fide_id: str):
23-
response = fide_scraper.player_history(fide_id=fide_id)
23+
response = fide_scraper.get_player_history(fide_id=fide_id)
2424
return response
2525

2626
@app.get("/player_info/")
2727
async def player_info(fide_id: str, history: bool = False):
28-
response = fide_scraper.player_info(fide_id=fide_id, history=history)
28+
response = fide_scraper.get_player_info(fide_id=fide_id, history=history)
2929
return response

src/scraper/fide_scraper.py

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import requests
2+
import src.scraper.functions as scraper
3+
4+
def get_top_players(limit: int = 100, history: bool = False) -> list[dict]:
5+
fide_top_players_page = requests.get("https://ratings.fide.com/top.phtml")
6+
7+
html_doc = fide_top_players_page.text
8+
9+
top_players = scraper.get_top_players(html_doc)
10+
11+
top_players = top_players[0:limit]
12+
13+
if history == False: return top_players
14+
15+
for player_dict in top_players:
16+
fide_profile_page = f"https://ratings.fide.com/profile/{player_dict['fide_id']}"
17+
18+
response = requests.get(fide_profile_page)
19+
20+
html_doc = response.text
21+
22+
player_history = scraper.get_player_history(html_doc)
23+
24+
player_dict["history"] = player_history
25+
26+
return top_players
27+
28+
def get_player_history(fide_id: str) -> list[dict]:
29+
fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
30+
31+
response = requests.get(fide_profile_page)
32+
33+
html_doc = response.text
34+
35+
player_history = scraper.get_player_history(html_doc)
36+
37+
return player_history
38+
39+
def get_player_info(fide_id: str, history: bool = False):
40+
fide_profile_page = f"https://ratings.fide.com/profile/{fide_id}"
41+
42+
response = requests.get(fide_profile_page)
43+
44+
html_doc = response.text
45+
46+
player_info = scraper.get_player_info(html_doc)
47+
48+
if history == False: return player_info
49+
50+
player_history = scraper.get_player_history(html_doc)
51+
52+
player_info["history"] = player_history
53+
54+
return player_info

src/scraper/functions/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from src.scraper.functions.player_history import get_player_history
2+
from src.scraper.functions.player_info import get_player_info
3+
from src.scraper.functions.top_players import get_top_players
+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from bs4 import BeautifulSoup
2+
3+
from src.scraper.functions.utils import fide_date_to_numeric_string
4+
5+
def get_player_history(html_doc: str):
6+
soup = BeautifulSoup(html_doc, "html.parser")
7+
8+
table_selector = "body > section.container.section-profile > div.row.no-gutters > div.profile-bottom.col-lg-12 > div.profile-tab-containers > div:nth-child(3) > div > div.col-lg-12.profile-tableCont > table > tbody"
9+
10+
table = soup.select_one(table_selector)
11+
12+
rows: list = table.find_all("tr")
13+
14+
player_history = []
15+
16+
for row in rows:
17+
raw_row = []
18+
19+
for column in row.find_all("td"):
20+
raw_data = column.get_text().replace(u'\xa0', '').strip()
21+
22+
raw_row.append(raw_data)
23+
24+
player_history.append({
25+
"period": raw_row[0],
26+
"classical_rating": int(raw_row[1] or 0),
27+
"classical_games": int(raw_row[2] or 0),
28+
"rapid_rating": int(raw_row[3] or 0),
29+
"rapid_games": int(raw_row[4] or 0),
30+
"blitz_rating": int(raw_row[5] or 0),
31+
"blitz_games": int(raw_row[6] or 0),
32+
"date": fide_date_to_numeric_string(raw_row[0]),
33+
})
34+
35+
return player_history

src/scraper/functions/player_info.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from bs4 import BeautifulSoup
2+
3+
def get_player_info(html_doc: str):
4+
soup = BeautifulSoup(html_doc, "html.parser")
5+
6+
player_info = {
7+
"fide_id": soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-top.col-lg-12 > div > div.col-lg-9.profile-top__right > div > div.col-lg-12.profile-top-info > div:nth-child(1) > div:nth-child(3) > div.profile-top-info__block__row__data").get_text().strip(),
8+
"fide_title": soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-top.col-lg-12 > div > div.col-lg-9.profile-top__right > div > div.col-lg-12.profile-top-info > div:nth-child(2) > div:nth-child(3) > div.profile-top-info__block__row__data").get_text().strip(),
9+
"federation": soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-top.col-lg-12 > div > div.col-lg-9.profile-top__right > div > div.col-lg-12.profile-top-info > div:nth-child(1) > div:nth-child(2) > div.profile-top-info__block__row__data").get_text().strip(),
10+
"birth_year": int(soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-top.col-lg-12 > div > div.col-lg-9.profile-top__right > div > div.col-lg-12.profile-top-info > div:nth-child(2) > div:nth-child(1) > div.profile-top-info__block__row__data").get_text().strip() or 0),
11+
"sex": soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-top.col-lg-12 > div > div.col-lg-9.profile-top__right > div > div.col-lg-12.profile-top-info > div:nth-child(2) > div:nth-child(2) > div.profile-top-info__block__row__data").get_text().strip(),
12+
"name": soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-top.col-lg-12 > div > div.col-lg-9.profile-top__right > div > div.col-lg-8.profile-top-title").get_text().strip(),
13+
"world_rank_all": int(soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-bottom.col-lg-12 > div.profile-tab-containers > div.profile-tab-container.profile-tab-container_active > div:nth-child(2) > div:nth-child(1) > table > tbody > tr:nth-child(1) > td:nth-child(2)").get_text().strip() or 0),
14+
"world_rank_active": int(soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-bottom.col-lg-12 > div.profile-tab-containers > div.profile-tab-container.profile-tab-container_active > div:nth-child(2) > div:nth-child(1) > table > tbody > tr:nth-child(2) > td:nth-child(2)").get_text().strip() or 0),
15+
"continental_rank_all": int(soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-bottom.col-lg-12 > div.profile-tab-containers > div.profile-tab-container.profile-tab-container_active > div:nth-child(2) > div:nth-child(3) > table > tbody > tr:nth-child(1) > td:nth-child(2)").get_text().strip() or 0),
16+
"continental_rank_active": int(soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-bottom.col-lg-12 > div.profile-tab-containers > div.profile-tab-container.profile-tab-container_active > div:nth-child(2) > div:nth-child(3) > table > tbody > tr:nth-child(2) > td:nth-child(2)").get_text().strip() or 0),
17+
"national_rank_all": int(soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-bottom.col-lg-12 > div.profile-tab-containers > div.profile-tab-container.profile-tab-container_active > div:nth-child(2) > div:nth-child(2) > table > tbody > tr:nth-child(1) > td:nth-child(2)").get_text().strip() or 0),
18+
"national_rank_active": int(soup.select_one("body > section.container.section-profile > div.row.no-gutters > div.profile-bottom.col-lg-12 > div.profile-tab-containers > div.profile-tab-container.profile-tab-container_active > div:nth-child(2) > div:nth-child(2) > table > tbody > tr:nth-child(2) > td:nth-child(2)").get_text().strip() or 0),
19+
}
20+
21+
return player_info

0 commit comments

Comments
 (0)