Add diyanet.gov.tr scraper

7 years ago · 602196be13
commit 602196be13
5 changed files with 43 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 venv
 .idea
--- a/app/init.py
+++ b/app/init.py
--- a/app/app.py
+++ b/app/app.py
--- a/core/init.py
+++ b/core/init.py
--- a/core/diyanet.py
+++ b/core/diyanet.py
@ -0,0 +1,41 @@
 from datetime import datetime
 from typing import List
 import httpx
 from bs4 import BeautifulSoup
 from pydantic import BaseModel
 _http = httpx.AsyncClient()
 class PrayerTimes(BaseModel):
    date: datetime
    fajr: str
    sun: str
    dhuhr: str
    asr: str
    maghrib: str
    isha: str
 async def parse_prayer_times(url: str) -> List[PrayerTimes]:
    res = await _http.get(url)
    soup = BeautifulSoup(res.text, 'html.parser')
    items = []
    for row in soup.select('#tab-1 .vakit-table tbody tr'):
        cell_texts = (c.text.strip() for c in row.select('td'))
        headers = PrayerTimes.__fields__.keys()
        parsed = dict(zip(headers, cell_texts))
        parsed['date'] = datetime.strptime(parsed['date'], '%d.%m.%Y')
        items.append(PrayerTimes(**parsed))
    return items
 if __name__ == '__main__':
    import asyncio
    import pprint
    loop = asyncio.get_event_loop()
    results = loop.run_until_complete(parse_prayer_times('https://namazvakitleri.diyanet.gov.tr/en-US'))
    pprint.pprint(results)