From 2ffa4ea4d82bdf70e416a5dd835bc912b0018b5b Mon Sep 17 00:00:00 2001 From: Cocopops16 Date: Sun, 23 Mar 2025 13:40:47 +0100 Subject: [PATCH] v0.0.1 --- Dockerfile | 15 +++++++++++ main.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 7 +++++ 3 files changed, 91 insertions(+) create mode 100644 Dockerfile create mode 100644 main.py create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..75b0016 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9-alpine + +WORKDIR /app + +RUN apk add --no-cache gcc musl-dev libffi-dev postgresql-dev + +COPY requirements.txt /app/ + +RUN pip install --no-cache-dir -r requirements.txt + +COPY main.py /app/ + +EXPOSE 8000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..6e5ca62 --- /dev/null +++ b/main.py @@ -0,0 +1,69 @@ +from fastapi import FastAPI, BackgroundTasks, Depends +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine +from sqlalchemy.orm import sessionmaker, declarative_base +from sqlalchemy import Column, Integer, String, DateTime +import aiohttp +from bs4 import BeautifulSoup +from datetime import datetime +import os + +# Récupère les informations des secrets via les variables d'environnement +DATABASE_URL = os.getenv("DB_URL") +DATABASE_USERNAME = os.getenv("DB_USERNAME") +DATABASE_PASSWORD = os.getenv("DB_PASSWORD") +DATABASE_NAME = os.getenv("DB_NAME") + +# Créer l'URL complète pour la connexion à la base de données +DATABASE_URL_FULL = f"postgresql+asyncpg://{DATABASE_USERNAME}:{DATABASE_PASSWORD}@{DATABASE_URL}/{DATABASE_NAME}" + +engine = create_async_engine(DATABASE_URL_FULL, echo=True) +AsyncSessionLocal = sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False) +Base = declarative_base() + +app = FastAPI() + +class Loi(Base): + __tablename__ = "lois" + id = Column(Integer, primary_key=True, index=True) + titre = Column(String, index=True) + url = Column(String, unique=True) + date = Column(DateTime, default=datetime.utcnow) + +async def get_db(): + async with AsyncSessionLocal() as session: + yield session + +@app.on_event("startup") +async def startup(): + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + +async def scrape_legifrance(db: AsyncSession): + url = "https://www.legifrance.gouv.fr/jorf/" + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + html = await response.text() + soup = BeautifulSoup(html, "html.parser") + lois = [] + for article in soup.select(".mainListJorf .titleArticle"): + titre = article.get_text(strip=True) + lien = article.find("a")["href"] + full_url = f"https://www.legifrance.gouv.fr{lien}" + lois.append((titre, full_url)) + + for titre, full_url in lois: + existing = await db.execute("SELECT id FROM lois WHERE url = :url", {"url": full_url}) + if existing.fetchone() is None: + new_loi = Loi(titre=titre, url=full_url) + db.add(new_loi) + await db.commit() + +@app.get("/scrape") +async def trigger_scrape(background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)): + background_tasks.add_task(scrape_legifrance, db) + return {"message": "Scraping en cours"} + +@app.get("/lois") +async def list_lois(db: AsyncSession = Depends(get_db)): + result = await db.execute("SELECT * FROM lois ORDER BY date DESC LIMIT 10") + return result.fetchall() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fad7ad1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn +aiohttp +beautifulsoup4 +sqlalchemy +asyncpg +psycopg2-binary