This commit is contained in:
Cocopops16 2025-03-23 13:40:47 +01:00
parent 824e1cfae1
commit 2ffa4ea4d8
3 changed files with 91 additions and 0 deletions

15
Dockerfile Normal file
View File

@ -0,0 +1,15 @@
FROM python:3.9-alpine
WORKDIR /app
RUN apk add --no-cache gcc musl-dev libffi-dev postgresql-dev
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt
COPY main.py /app/
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

69
main.py Normal file
View File

@ -0,0 +1,69 @@
from fastapi import FastAPI, BackgroundTasks, Depends
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy import Column, Integer, String, DateTime
import aiohttp
from bs4 import BeautifulSoup
from datetime import datetime
import os
# Récupère les informations des secrets via les variables d'environnement
DATABASE_URL = os.getenv("DB_URL")
DATABASE_USERNAME = os.getenv("DB_USERNAME")
DATABASE_PASSWORD = os.getenv("DB_PASSWORD")
DATABASE_NAME = os.getenv("DB_NAME")
# Créer l'URL complète pour la connexion à la base de données
DATABASE_URL_FULL = f"postgresql+asyncpg://{DATABASE_USERNAME}:{DATABASE_PASSWORD}@{DATABASE_URL}/{DATABASE_NAME}"
engine = create_async_engine(DATABASE_URL_FULL, echo=True)
AsyncSessionLocal = sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False)
Base = declarative_base()
app = FastAPI()
class Loi(Base):
__tablename__ = "lois"
id = Column(Integer, primary_key=True, index=True)
titre = Column(String, index=True)
url = Column(String, unique=True)
date = Column(DateTime, default=datetime.utcnow)
async def get_db():
async with AsyncSessionLocal() as session:
yield session
@app.on_event("startup")
async def startup():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async def scrape_legifrance(db: AsyncSession):
url = "https://www.legifrance.gouv.fr/jorf/"
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
html = await response.text()
soup = BeautifulSoup(html, "html.parser")
lois = []
for article in soup.select(".mainListJorf .titleArticle"):
titre = article.get_text(strip=True)
lien = article.find("a")["href"]
full_url = f"https://www.legifrance.gouv.fr{lien}"
lois.append((titre, full_url))
for titre, full_url in lois:
existing = await db.execute("SELECT id FROM lois WHERE url = :url", {"url": full_url})
if existing.fetchone() is None:
new_loi = Loi(titre=titre, url=full_url)
db.add(new_loi)
await db.commit()
@app.get("/scrape")
async def trigger_scrape(background_tasks: BackgroundTasks, db: AsyncSession = Depends(get_db)):
background_tasks.add_task(scrape_legifrance, db)
return {"message": "Scraping en cours"}
@app.get("/lois")
async def list_lois(db: AsyncSession = Depends(get_db)):
result = await db.execute("SELECT * FROM lois ORDER BY date DESC LIMIT 10")
return result.fetchall()

7
requirements.txt Normal file
View File

@ -0,0 +1,7 @@
fastapi
uvicorn
aiohttp
beautifulsoup4
sqlalchemy
asyncpg
psycopg2-binary