Add diyanet.gov.tr scraper

6 years ago · 602196be13
commit 602196be13
5 changed files with 43 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+venv
+.idea
--- a/app/init.py
+++ b/app/init.py
--- a/app/app.py
+++ b/app/app.py
--- a/core/init.py
+++ b/core/init.py
--- a/core/diyanet.py
+++ b/core/diyanet.py
@ -0,0 +1,41 @@
+from datetime import datetime
+from typing import List
+
+import httpx
+from bs4 import BeautifulSoup
+from pydantic import BaseModel
+
+_http = httpx.AsyncClient()
+
+
+class PrayerTimes(BaseModel):
+    date: datetime
+    fajr: str
+    sun: str
+    dhuhr: str
+    asr: str
+    maghrib: str
+    isha: str
+
+
+async def parse_prayer_times(url: str) -> List[PrayerTimes]:
+    res = await _http.get(url)
+    soup = BeautifulSoup(res.text, 'html.parser')
+
+    items = []
+    for row in soup.select('#tab-1 .vakit-table tbody tr'):
+        cell_texts = (c.text.strip() for c in row.select('td'))
+        headers = PrayerTimes.__fields__.keys()
+        parsed = dict(zip(headers, cell_texts))
+        parsed['date'] = datetime.strptime(parsed['date'], '%d.%m.%Y')
+        items.append(PrayerTimes(**parsed))
+    return items
+
+
+if __name__ == '__main__':
+    import asyncio
+    import pprint
+
+    loop = asyncio.get_event_loop()
+    results = loop.run_until_complete(parse_prayer_times('https://namazvakitleri.diyanet.gov.tr/en-US'))
+    pprint.pprint(results)