Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Tests

on:
pull_request:
# The branches below must be a subset of the branches above
branches: [main]

jobs:
lint:
runs-on: [self-hosted, cluster.loopingz.com]
steps:
- uses: actions/checkout@v2
# black formatting
- uses: psf/black@stable
with:
options: "--line-length 100 --check --diff"
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,6 @@ cython_debug/
marimo/_static/
marimo/_lsp/
__marimo__/

# Configuration
config.yml
9 changes: 9 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
},

"black-formatter.args": ["--line-length", "100"],
"pylint.args": ["--disable=W0401, W0614, W1514"]
}
22 changes: 21 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,22 @@
# modalshift
# MODALSHIFT

Open developments for MODALSHIFT EU project

## Configuration file

Put a file name config.yml at root.

Here is an example configuration.

```yaml
S3_server: https://url
S3_id_env_name: S3_ID
S3_secret_env_name: S3_SECRET
output_directory: data/
```

## Download a file

```python
python3 src/get_data_from_S3.py
```
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
black~=25.1.0
tqdm==4.66.4
botocore==1.41.0
boto3==1.41.0
91 changes: 91 additions & 0 deletions src/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Functions
"""

import os
import boto3
import botocore
import tqdm
import yaml

# get configuration
with open("../config.yml", "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)

S3_SERVER = config["S3_server"]
S3_ID_ENV_NAME = config["S3_id_env_name"]
S3_SECRET_ENV_NAME = config["S3_secret_env_name"]
OUTPUT_DIRECTORY = config["output_directory"]
DOWNLOAD_FILE = config["download_file"]
OUTPUT_FILE = config["output_file"]

# make output directory
if not os.path.exists(OUTPUT_DIRECTORY):
os.mkdir(OUTPUT_DIRECTORY)

# init S3 session
session = boto3.Session()

S3_CLIENT = session.client(
service_name="s3",
aws_access_key_id=os.environ[S3_ID_ENV_NAME],
aws_secret_access_key=os.environ[S3_SECRET_ENV_NAME],
endpoint_url=S3_SERVER,
)


def download_object_from_s3_with_progress(client, *, bucket, key, version_id=None, filename):
"""
Download an object from S3 with a progress bar.

From https://alexwlchan.net/2021/04/s3-progress-bars/
"""
s3 = client

# First get the size, so we know what tqdm is counting up to.
# Theoretically the size could change between this HeadObject and starting
# to download the file, but this would only affect the progress bar.
kwargs = {"Bucket": bucket, "Key": key}

if version_id is not None:
kwargs["VersionId"] = version_id

object_size = s3.head_object(**kwargs)["ContentLength"]

# Now actually download the object, with a progress bar to match.
# How this works:
#
# - We take manual control of tqdm() using a ``with`` statement,
# see https://pypi.org/project/tqdm/#manual
#
# - We set ``unit_scale=True`` so tqdm uses SI unit prefixes, and
# ``unit="B"`` means it adds a "B" as a suffix. This means we get
# progress info like "14.5kB/s".
#
# (Note: the "B" is just a string; tqdm doesn't know these are
# bytes and doesn't care.)
#
# - The Callback method on a boto3 S3 function is called
# periodically during the download with the number of bytes
# transferred. We can use it to update the progress bar.
#
if version_id is not None:
ExtraArgs = {"VersionId": version_id}
else:
ExtraArgs = None

with tqdm.tqdm(total=object_size, unit="B", unit_scale=True, desc=filename) as pbar:
download_object_from_s3(
s3,
bucket,
key,
filename,
ExtraArgs,
lambda bytes_transferred: pbar.update(bytes_transferred),
)


def download_object_from_s3(client, bucket, key, filename, extraArgs=None, callback=None):
client.download_file(
Bucket=bucket, Key=key, ExtraArgs=extraArgs, Filename=filename, Callback=callback
)
26 changes: 26 additions & 0 deletions src/get_data_from_S3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Get data from S3
"""

# %%
# imports

from functions import (
download_object_from_s3_with_progress,
OUTPUT_DIRECTORY,
S3_CLIENT,
DOWNLOAD_FILE,
OUTPUT_FILE,
)

# %%
# download

download_object_from_s3_with_progress(
S3_CLIENT,
bucket="tellae-mobility-bots",
key=DOWNLOAD_FILE,
filename=f"{OUTPUT_DIRECTORY}/{OUTPUT_FILE}",
)

# %%
80 changes: 80 additions & 0 deletions src/list_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Get data from S3
"""

# %%
# imports

from functions import (
OUTPUT_DIRECTORY,
S3_CLIENT,
botocore,
download_object_from_s3_with_progress,
)


# %%
#

# %%
# test listing folders and counting files


def list_folder_objects(client, bucket, prefix):
file_list = []
for key in client.list_objects(Bucket=bucket, Prefix=prefix)["Contents"]:
file_list.append(key["Key"])

return file_list


bucket = "tellae-mobility-bots"
prefix_list = [
"GTFS-RT/renfe/",
"gtfs/renfe-cercanias/",
"gtfs/",
"gbfs/",
"GTFS-RT/bibus/",
] # Make sure you provide / in the end

for prefix in prefix_list:
print("******", prefix, "******")
try:
result = S3_CLIENT.list_objects(Bucket=bucket, Prefix=prefix, Delimiter="/")

common_prefixes = result.get("CommonPrefixes")
if not common_prefixes is None:
for o in result.get("CommonPrefixes"):
print("sub folder : ", o.get("Prefix"))
print("# files: ", len(list_folder_objects(S3_CLIENT, bucket, prefix)))
else:
print("no sub folder")
print("# files: ", len(list_folder_objects(S3_CLIENT, bucket, prefix)))

except botocore.exceptions.ClientError as e:
print(e)

# %%
# test downloading files

files = [
"gtfs/madrid-metro-ligero/gtfs_madrid-metro-ligero_2025-11-11.zip",
"GTFS-RT/renfe/bus_position_20250907_renfe.csv.bz2",
"gbfs/auray/2025-02-02_auray.csv.bz2",
"GTFS-RT/bibus/bus_position_20240102_bibus.csv.bz2",
]

for file in files:
print("*******", file, "******")
try:
download_object_from_s3_with_progress(
client=S3_CLIENT,
bucket=bucket,
key=file,
filename=f"{OUTPUT_DIRECTORY}/{file.replace('/', '_')}",
)
except botocore.exceptions.ClientError as e:
print(e)

# %%
#
Loading