Skip to content

Commit

Permalink
Update documentation: Datamodels, architecture and endpoints (#181)
Browse files Browse the repository at this point in the history
* Update SurveyAnswer to move to ModelForm

* Update SurveyAnswer to move to ModelForm

* Fix test import

* Updated datamodels documentation

* Updated datamodels documentation

* Update architechture and endpoint documentation
  • Loading branch information
JPMartinezClaeys committed May 23, 2024
1 parent 13b401d commit 773b96c
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 42 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 5.0.4 on 2024-05-22 22:13

import django.core.validators
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("route_rangers_api", "0009_changing_survey"),
]

operations = [
migrations.AlterField(
model_name="surveyresponse",
name="transit_improvement_open",
field=models.CharField(max_length=128, null=True),
),
migrations.AlterField(
model_name="surveyresponse",
name="trip_time",
field=models.PositiveIntegerField(
null=True,
validators=[
django.core.validators.MinValueValidator(1),
django.core.validators.MaxValueValidator(240),
],
),
),
]
7 changes: 5 additions & 2 deletions app/route_rangers_api/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from django.contrib.gis.db import models
from django.core.validators import MinValueValidator, MaxValueValidator
from app.route_rangers_api.utils.city_mapping import (
CITIES_CHOICES,
TRIP_FREQ,
Expand Down Expand Up @@ -255,13 +256,15 @@ class SurveyResponse(models.Model):
# Page 2:
trip_frequency = models.IntegerField(choices=TRIP_FREQ, null=True)
trip_tod = models.IntegerField(choices=TIME_OF_DAY, null=True)
trip_time = models.IntegerField(null=True)
trip_time = models.PositiveIntegerField(
null=True, validators=[MinValueValidator(1), MaxValueValidator(240)]
)
modes_of_transit = models.IntegerField(choices=MODES_OF_TRANSIT, null=True)

# Page 3:
satisfied = models.IntegerField(choices=SATISFIED, null=True)
transit_improvement = models.IntegerField(choices=TRANSIT_IMPROVEMENT, null=True)
transit_improvement_open = models.CharField(max_length=128)
transit_improvement_open = models.CharField(max_length=128, null=True)

# Page 4:
switch_to_transit = models.IntegerField(choices=SWITCH_TO_TRANSIT, null=True)
Expand Down
21 changes: 8 additions & 13 deletions documentation/architecture-and-deployment.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,19 @@ To load the database tables:
1. Make sure you have all of the python dependencies installed
2. Make sure you have all of the database (and other) credentials in your `.env` folder
3. Install [gdal](https://gdal.org/index.html) in your machine
- If working on an Intel chip Mac you can run `brew install gdal`
- TODO update for otherr use cases
- Run `brew install gdal` if using `homebrew`
- If working on an Intel chip Mac, setup should be completed
- If working on an M2 chip, on `app/geodajngo/settings.py` you'll need to import from a `.env` file (or manually) the `GDAL_LIBRARY_PATH` and `GEOS_LIBRARY_PATH` variables. If using `homebrew` head to `/opt/homebrew/Cellar` and identify the files `libgdal.dylib` and `libgeos_c.dylib` and set the path to the path of those files.
4. Run:
```
$ cd app
# python -m manage makemigrations
$ python -m manage migrate
```

For testing that the set up is correct we recomend launching the server locally as explained in `Frontend` below

5. Database tables should be established, you can double check by logging into the database using postico or some other postgres login tool

### Ingestion
Ingestion scripts are not finalized.
The ingestion files are stored in `app.scripts/`. To ingest the data of one file with its default values run on the terminal in the `app/` directory `python -m manage runscript <module_name>`. Running it in this manner allows to use `django-extensions`, which deals with some of the Django settings necessary, particularly geographic data dependencies.

To get data from cities' GTFS and ingest it to PostGIS database backend, navigate to the
`app` folder and run `python -m manage runscript extract_scheduled_gtfs`.
As of now, it is only a "test" script that runs a process to ingest the transit
stations from Chicago's Metra system; it will be expanded out to ingest more kinds
of data in the coming days.
Some of the files can accept additional parameters, for these cases, the command is `python -m manage runscript <module_name> --script-args <arg_1> <arg_2> ...`

### Frontend
To run the webserver locally (again make sure you have dependencies installed and `.env` up to date)
Expand Down
58 changes: 31 additions & 27 deletions documentation/datamodel.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Datamodel prototype

A visual representation of the datamodel can be found in the following [lucidchart link](https://lucid.app/lucidchart/acedfe58-359d-42ba-8dc9-b9421517ead9/edit?invitationId=inv_a9fee266-b5b0-4243-bfa8-ccf7f44afd22&referringApp=slack&page=0_0#)
A visual representation of the datamodel can be found in the following [link](https://lucid.app/lucidchart/acedfe58-359d-42ba-8dc9-b9421517ead9/edit?invitationId=inv_a9fee266-b5b0-4243-bfa8-ccf7f44afd22&referringApp=slack&page=0_0#)

Relationships between tables are specified by the lines connecting multiple tables. In the diagram its specified the type of matching expected, where 1:1 represents a one to one matching, m:1 many to one, and m:m many to many.

Expand All @@ -13,11 +13,12 @@ The datamodel is composed of the following tables:
- RidershipStation
- BikeStation
- BikeRidership
- Survey
- SurveyAnswer
- PlannedRoute
- SurveyUser
- SurveyResponse

The **Demographics** table contains demographic information at the Census Tract level, containing information for the following variables: Population, Median Household Income, Means of Transportation to Work, Time of Departure to Go to Work, Travel Time to Work, Vehicles Available and Disability Status. The table is structured in the following way:
## Demographics

The **Demographics** table contains demographic information at the Census Tract level obtained from the American Community Survey, containing information for the following variables: Population, Median Household Income, Means of Transportation to Work, Time of Departure to Go to Work, Travel Time to Work, Vehicles Available and Disability Status. The table is structured in the following way:

| Name | Type | Description |
| ------------------------------ | -------------------- | -----------------------------------------------------|
Expand All @@ -35,9 +36,10 @@ The **Demographics** table contains demographic information at the Census Tract
| work_commute_time_45_60 | integer | Number of people that take between 45 and 60 minutes commuting to work |
| work_commute_time_60_90 | integer | Number of people that take between 60 and 90 minutes commuting to work |
| work_commute_time_over_90 | integer | Number of people that take over 90' commuting to work |
| vehicles_available | integer | Number of vehicles available in census tract |
| geographic_representation | Polygon | Geographic representation of census tract |

## Public Transportation

The public transportation system information from each city is stored in the **TransitStation** and **TransitRoute** tables. The TransitStation represent each bus stop and subway station while TransitRoute represents each bus route, subway line or any other route (i.e. rail line). The **TransitStation** table is structured of the following way:

| Name | Type | Description |
Expand Down Expand Up @@ -96,6 +98,8 @@ The **RidershipStation** stores information of daily ridership data for a Statio

For this table, as a constraint, there must be uniqueness in the combination of the fields station_id and date

## Bike data

The **BikeStations** and **BikeRidership** represent the stations and ridership data for publicly available bikes for rent (CitiBikes,Divvy and BIKETOWN). The **BikeStation** table is represented in the following way:

| Name | Type | Description |
Expand All @@ -122,35 +126,35 @@ The **BikeRidership** table is structured in the following way:

For this table, as a constraint, there must be uniqueness in the combination of the fields station_id and date

For the survey information there are two tables that represent the necessary information **Survey** and **SurveyAnswer**. Survey stores every survey that has been deployed in the platform while SurveyAnswer represents a users survey answer.
## Survey

The **Survey** table is represented by the following structure:
For the survey information there are two tables that represent the necessary information **SurveyUser** and **SurveyResponse**. SurveyUser stores a user id and basic transit information.

The **SurveyUser** table is represented by the following structure:

| Name | Type | Description |
| ----------------- | -------------------- | ----------------------------------------------------------------------------------- |
| id | Primary Key | Identificator autogenerated by Django |
| name | string | Name of the survey |
| created_at | Datetime object | Date the survey was created |
| questionnaire | JSON | JSON object with representation of the different questions and its possible answers |

The **SurveyAnswer** table is structured in the following way:

| Name | Type | Description |
| -------------------- | -------------------- | ----------------------------------------------- |
| id | Primary Key | Identificator autogenerated by Django |
| user_id | string | Id of the user answering the survey |
| user_id | Primary Key | Identificator generated for each user (session) |
| city | string | City of residence of the user |
| survey_id | ForeignKey(Survey) | Survey that the user is answering |
| response_date | Datetime object | Date and time of when the answer was submitted |
| answer | JSON | JSON object containing the users answers |
| frequent_transit | Bool | True if the user answer to use transit frequently, False otherwise |
| car_owner | Bool | True if the user answer to own a car, False otherwise |

For the "Plan Your Route" feature, the results are stored in the **PlannedRoute** table.

The **PlannedRoute** table is structured in the following way:
The **SurveyResponse** table represents each response to the survey associated to one trip submitted

| Name | Type | Description |
| -------------------- | -------------------- | ----------------------------------------------- |
| id | Primary Key | Identificator autogenerated by Django |
| user_id | string | Id of the user suggesting a route |
| response_date | Datetime object | Date and time of when the answer was submitted |
| route | LineString | Geoetric representation of the planned route |
| user_id | ForeignKey(SurveyUser) | User associated with the response |
| city | string | City of residence of the user |
| route | LineString | Geoetric representation of the submitted route |
| starting_point | Point | Starting point of the submitted route |
| end_point | Point | Endinging point of the submitted route |
| trip_frequency | Integer | Frequency on how often a user take the submitted route |
| trip_tod | Integer | Time of day of when the user takes the submitted route |
| trip_time | Integer | Time it takes the user to complete the submitted route |
| mode_of_transportation | Integer | Mode of transportation used to take the submitted route |
| satisfied | Integer | Date and time of when the answer was submitted |
| transit_improvement | Integer | Choice of how to improve the submitted route |
| transit_improvement_open | Integer | Open answer on hot to improve the submitted route |
| switch_to_transit | Integer | Factor that would make a user switch to transit |
3 changes: 3 additions & 0 deletions documentation/endpoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ These are the endpoints that users will engage with via the web app. Since we ar
* `/survey/<city>/`
* returns: survey form for riders to fill out and map to provide routes they would use

* `/about/`
* returns: description of the project and listing of project members with cute pictures


## Backend Endpoints (likely not to be implemented for now)
The following routes are RESTful routes scoped for if the web app was built/deployed separately from the django app and was ingesting the data via WebAPI routes instead of directly from views (the way it is now)
Expand Down
144 changes: 144 additions & 0 deletions tests/test_extract_census_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import os
import sys
import pytest
from unittest.mock import patch, mock_open
from app.scripts.ingest_census_data import (
valid_command_line_arg,
get_census_data,
store_census_data,
city_fips,
)


@pytest.fixture
def supported_cities():
return list(city_fips.keys())


########################################################################################
# COMMAND LINE TESTS
########################################################################################


def test_valid_command_line_arg_correct_input(monkeypatch, supported_cities):
monkeypatch.setattr("sys.argv", ["extract_census_data.py", "portland"])
assert valid_command_line_arg(supported_cities) == "portland"


def test_valid_command_line_arg_incorrect_input(monkeypatch, supported_cities):
monkeypatch.setattr("sys.argv", ["extract_census_data.py", "miami"])
with pytest.raises(SystemExit) as excinfo:
valid_command_line_arg(supported_cities)
assert "Unsupported city. Available options: nyc, chicago, portland" == str(
excinfo.value
)


@pytest.mark.parametrize(
"argv", [(["extract_census_data.py"]), (["extract_census_data.py", "nyc", "miami"])]
)
def test_valid_command_line_arg_incorrect_number_inputs(
monkeypatch, supported_cities, argv
):
monkeypatch.setattr("sys.argv", argv)
with pytest.raises(SystemExit) as excinfo:
valid_command_line_arg(supported_cities)
assert "Retype command as 'python3 extract_census_data.py <city_name>'" == str(
excinfo.value
)


########################################################################################
# DATA SCRAPING TESTS
########################################################################################


@patch("requests.get")
def test_get_census_data_successful_api_call(mock_get):
mock_data = [
["state_code", "county_code", "B01001_001E", "block_group"],
["36", "061", "1000", "1"],
["36", "047", "2000", "2"],
]
mock_get.return_value.status_code = 200
mock_get.return_value.json.return_value = mock_data

expected = [
{
"state_code": "36",
"county_code": "061",
"B01001_001E": "1000",
"block_group": "1",
},
{
"state_code": "36",
"county_code": "047",
"B01001_001E": "2000",
"block_group": "2",
},
]
actual = get_census_data({"B01001_001E": "total_population"}, "36", "061")
assert actual == expected


@pytest.mark.parametrize("status_code", [400, 401, 404, 429, 500, 503])
@patch("requests.get")
def test_get_census_data_api_failure(mock_get, status_code):
mock_get.return_value.status_code = status_code
mock_get.return_value.json.return_value = {"error": "Something went wrong"}
assert get_census_data({"B01001_001E": "total_population"}, "36", "061") == []


########################################################################################
# DATA STORING TESTS
########################################################################################


@pytest.fixture
def mock_env(monkeypatch):
monkeypatch.setattr(os, "getcwd", lambda: "/fake/dir")
monkeypatch.setattr(sys, "argv", ["script.py", "nyc"])


def test_store_census_data_with_valid_data(mock_env):
"""
Test
"""
with patch("builtins.open", mock_open(read_data="data"), create=True) as mock_file:
data = [
{
"state_code": "36",
"county_code": "061",
"total_population": "1000",
"block_group": "1",
},
{
"state_code": "36",
"county_code": "047",
"total_population": "2000",
"block_group": "2",
},
]
store_census_data(data, "nyc")
mock_file.assert_called_once_with("/fake/dir/nyc_data.csv", "w", newline="")
handle = mock_file()
assert handle.write.call_count > 2


def test_store_census_data_with_invalid_data(mock_env):
"""
Test
"""
invalid_data = [
{
"state_code": "36",
},
{
"county_code": "047",
},
]
with patch("builtins.open", mock_open(read_data="data"), create=True) as mock_file:
store_census_data(invalid_data, "nyc")
mock_file.assert_called_once_with("/fake/dir/nyc_data.csv", "w", newline="")
handle = mock_file()
assert handle.write.call_count == 3

0 comments on commit 773b96c

Please sign in to comment.