Update documentation: Datamodels, architecture and endpoints (#181)

* Update SurveyAnswer to move to ModelForm * Update SurveyAnswer to move to ModelForm * Fix test import * Updated datamodels documentation * Updated datamodels documentation * Update architechture and endpoint documentation
uchicago-capp-30320 · May 23, 2024 · 773b96c · 773b96c
1 parent 13b401d
commit 773b96c
Show file tree

Hide file tree

Showing 6 changed files with 221 additions and 42 deletions.
diff --git a/...ute_rangers_api/migrations/0010_alter_surveyresponse_transit_improvement_open_and_more.py b/...ute_rangers_api/migrations/0010_alter_surveyresponse_transit_improvement_open_and_more.py
@@ -0,0 +1,30 @@
+# Generated by Django 5.0.4 on 2024-05-22 22:13
+
+import django.core.validators
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("route_rangers_api", "0009_changing_survey"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="surveyresponse",
+            name="transit_improvement_open",
+            field=models.CharField(max_length=128, null=True),
+        ),
+        migrations.AlterField(
+            model_name="surveyresponse",
+            name="trip_time",
+            field=models.PositiveIntegerField(
+                null=True,
+                validators=[
+                    django.core.validators.MinValueValidator(1),
+                    django.core.validators.MaxValueValidator(240),
+                ],
+            ),
+        ),
+    ]
diff --git a/app/route_rangers_api/models.py b/app/route_rangers_api/models.py
@@ -1,4 +1,5 @@
 from django.contrib.gis.db import models
+from django.core.validators import MinValueValidator, MaxValueValidator
 from app.route_rangers_api.utils.city_mapping import (
     CITIES_CHOICES,
     TRIP_FREQ,
@@ -255,13 +256,15 @@ class SurveyResponse(models.Model):
     # Page 2:
     trip_frequency = models.IntegerField(choices=TRIP_FREQ, null=True)
     trip_tod = models.IntegerField(choices=TIME_OF_DAY, null=True)
-    trip_time = models.IntegerField(null=True)
+    trip_time = models.PositiveIntegerField(
+        null=True, validators=[MinValueValidator(1), MaxValueValidator(240)]
+    )
     modes_of_transit = models.IntegerField(choices=MODES_OF_TRANSIT, null=True)
 
     # Page 3:
     satisfied = models.IntegerField(choices=SATISFIED, null=True)
     transit_improvement = models.IntegerField(choices=TRANSIT_IMPROVEMENT, null=True)
-    transit_improvement_open = models.CharField(max_length=128)
+    transit_improvement_open = models.CharField(max_length=128, null=True)
 
     # Page 4:
     switch_to_transit = models.IntegerField(choices=SWITCH_TO_TRANSIT, null=True)

diff --git a/documentation/architecture-and-deployment.md b/documentation/architecture-and-deployment.md
@@ -18,24 +18,19 @@ To load the database tables:
 1. Make sure you have all of the python dependencies installed
 2. Make sure you have all of the database (and other) credentials in your `.env` folder
 3. Install [gdal](https://gdal.org/index.html) in your machine
-    - If working on an Intel chip Mac you can run `brew install gdal`
-    - TODO update for otherr use cases
+    - Run `brew install gdal` if using `homebrew`
+    - If working on an Intel chip Mac, setup should be completed
+    - If working on an M2 chip, on `app/geodajngo/settings.py` you'll need to import from a `.env` file (or manually) the `GDAL_LIBRARY_PATH` and `GEOS_LIBRARY_PATH` variables. If using `homebrew` head to `/opt/homebrew/Cellar` and identify the files `libgdal.dylib` and `libgeos_c.dylib` and set the path to the path of those files. 
 4. Run:
-```
-$ cd app
-# python -m manage makemigrations
-$ python -m manage migrate
-```
+
+For testing that the set up is correct we recomend launching the server locally as explained in `Frontend` below
+
 5. Database tables should be established, you can double check by logging into the database using postico or some other postgres login tool
 
 ### Ingestion
-Ingestion scripts are not finalized.
+The ingestion files are stored in `app.scripts/`. To ingest the data of one file with its default values run on the terminal in the `app/` directory `python -m manage runscript <module_name>`. Running it in this manner allows to use `django-extensions`, which deals with some of the Django settings necessary, particularly geographic data dependencies.
 
-To get data from cities' GTFS and ingest it to PostGIS database backend, navigate to the
-`app` folder and run `python -m manage runscript extract_scheduled_gtfs`.
-As of now, it is only a "test" script that runs a process to ingest the transit
-stations from Chicago's Metra system; it will be expanded out to ingest more kinds
-of data in the coming days.
+Some of the files can accept additional parameters, for these cases, the command is `python -m manage runscript <module_name> --script-args <arg_1> <arg_2> ...`
 
 ### Frontend
 To run the webserver locally (again make sure you have dependencies installed and `.env` up to date)

diff --git a/documentation/datamodel.md b/documentation/datamodel.md
@@ -1,6 +1,6 @@
 # Datamodel prototype 
 
-A visual representation of the datamodel can be found in the following [lucidchart link](https://lucid.app/lucidchart/acedfe58-359d-42ba-8dc9-b9421517ead9/edit?invitationId=inv_a9fee266-b5b0-4243-bfa8-ccf7f44afd22&referringApp=slack&page=0_0#)
+A visual representation of the datamodel can be found in the following [link](https://lucid.app/lucidchart/acedfe58-359d-42ba-8dc9-b9421517ead9/edit?invitationId=inv_a9fee266-b5b0-4243-bfa8-ccf7f44afd22&referringApp=slack&page=0_0#)
 
 Relationships between tables are specified by the lines connecting multiple tables. In the diagram its specified the type of matching expected, where 1:1 represents a one to one matching, m:1 many to one, and m:m many to many. 
 
@@ -13,11 +13,12 @@ The datamodel is composed of the following tables:
 - RidershipStation
 - BikeStation
 - BikeRidership
-- Survey
-- SurveyAnswer
-- PlannedRoute
+- SurveyUser
+- SurveyResponse
 
-The **Demographics** table contains demographic information at the Census Tract level, containing information for the following variables: Population, Median Household Income, Means of Transportation to Work, Time of Departure to Go to Work, Travel Time to Work, Vehicles Available and Disability Status. The table is structured in the following way: 
+## Demographics
+
+The **Demographics** table contains demographic information at the Census Tract level obtained from the American Community Survey, containing information for the following variables: Population, Median Household Income, Means of Transportation to Work, Time of Departure to Go to Work, Travel Time to Work, Vehicles Available and Disability Status. The table is structured in the following way: 
 
 | Name                           | Type                 | Description                                          |
 | ------------------------------ | -------------------- | -----------------------------------------------------|
@@ -35,9 +36,10 @@ The **Demographics** table contains demographic information at the Census Tract
 | work_commute_time_45_60              | integer              | Number of people that take between 45 and 60 minutes commuting to work |
 | work_commute_time_60_90              | integer              | Number of people that take between 60 and 90 minutes commuting to work |
 | work_commute_time_over_90              | integer              | Number of people that take over 90' commuting to work  |
-| vehicles_available             | integer              | Number of vehicles available in census tract         |
 | geographic_representation             | Polygon              | Geographic representation of census tract        |
 
+## Public Transportation
+
 The public transportation system information from each city is stored in the **TransitStation** and **TransitRoute** tables. The TransitStation represent each bus stop and subway station while TransitRoute represents each bus route, subway line or any other route (i.e. rail line). The **TransitStation** table is structured of the following way:
 
 | Name                | Type                 | Description                                          |
@@ -96,6 +98,8 @@ The **RidershipStation** stores information of daily ridership data for a Statio
 
 For this table, as a constraint, there must be uniqueness in the combination of the fields station_id and date
 
+## Bike data
+
 The **BikeStations** and **BikeRidership** represent the stations and ridership data for publicly available bikes for rent (CitiBikes,Divvy and BIKETOWN). The **BikeStation** table is represented in the following way:
 
 | Name              | Type                 | Description                                          |
@@ -122,35 +126,35 @@ The **BikeRidership** table is structured in the following way:
 
 For this table, as a constraint, there must be uniqueness in the combination of the fields station_id and date
 
-For the survey information there are two tables that represent the necessary information **Survey** and **SurveyAnswer**. Survey stores every survey that has been deployed in the platform while SurveyAnswer represents a users survey answer.
+## Survey
 
-The **Survey** table is represented by the following structure:
+For the survey information there are two tables that represent the necessary information **SurveyUser** and **SurveyResponse**. SurveyUser stores a user id and basic transit information.
+
+The **SurveyUser** table is represented by the following structure:
 
 | Name              | Type                 | Description                                                                         |
 | ----------------- | -------------------- | ----------------------------------------------------------------------------------- |
-| id                | Primary Key             | Identificator autogenerated by Django        | 
-| name              | string               | Name of the survey                                                                  |
-| created_at        | Datetime object      | Date the survey was created                                                         |
-| questionnaire     | JSON                 | JSON object with representation of the different questions and its possible answers |
-
-The **SurveyAnswer** table is structured in the following way:
-
-| Name                 | Type                 | Description                                     |
-| -------------------- | -------------------- | ----------------------------------------------- |
-| id                   | Primary Key          | Identificator autogenerated by Django        | 
-| user_id              | string               | Id of the user answering the survey             |
+| user_id                | Primary Key             | Identificator generated for each user (session)        | 
 | city                 | string               | City of residence of the user                   |
-| survey_id               | ForeignKey(Survey)   | Survey that the user is answering               |
-| response_date        | Datetime object      | Date and time of when the answer was submitted  |
-| answer               | JSON                 | JSON object containing the users answers        |
+| frequent_transit              | Bool               | True if the user answer to use transit frequently, False otherwise             |
+| car_owner        | Bool      | True if the user answer to own a car, False otherwise                                                     |
 
-For the "Plan Your Route" feature, the results are stored in the **PlannedRoute** table.
 
-The **PlannedRoute** table is structured in the following way:
+The **SurveyResponse** table represents each response to the survey associated to one trip submitted
 
 | Name                 | Type                 | Description                                     |
 | -------------------- | -------------------- | ----------------------------------------------- |
 | id                   | Primary Key          | Identificator autogenerated by Django           | 
-| user_id              | string               | Id of the user suggesting a route               |
-| response_date        | Datetime object      | Date and time of when the answer was submitted  |
-| route               | LineString            | Geoetric representation of the planned route     |
+| user_id              | ForeignKey(SurveyUser) | User associated with the response             |
+| city                 | string               | City of residence of the user                   |
+| route                | LineString            | Geoetric representation of the submitted route |
+| starting_point       | Point            | Starting point of the submitted route     |
+| end_point            | Point            | Endinging point of the submitted route     |
+| trip_frequency       | Integer   | Frequency on how often a user take the submitted route     |
+| trip_tod             | Integer   | Time of day of when the user takes the submitted route     |
+| trip_time             | Integer   | Time it takes the user to complete the submitted route    |
+| mode_of_transportation | Integer   | Mode of transportation used to take the submitted route  |
+| satisfied              | Integer      | Date and time of when the answer was submitted  |
+| transit_improvement         | Integer   | Choice of how to improve the submitted route        |
+| transit_improvement_open    | Integer   | Open answer on hot to improve the submitted route   |
+| switch_to_transit             | Integer   | Factor that would make a user switch to transit   |
diff --git a/documentation/endpoints.md b/documentation/endpoints.md
@@ -13,6 +13,9 @@ These are the endpoints that users will engage with via the web app. Since we ar
 * `/survey/<city>/`
     * returns: survey form for riders to fill out and map to provide routes they would use
 
+* `/about/`
+    * returns: description of the project and listing of project members with cute pictures
+
 
 ## Backend Endpoints (likely not to be implemented for now)
 The following routes are RESTful routes scoped for if the web app was built/deployed separately from the django app and was ingesting the data via WebAPI routes instead of directly from views (the way it is now)

diff --git a/tests/test_extract_census_data.py b/tests/test_extract_census_data.py
@@ -0,0 +1,144 @@
+import os
+import sys
+import pytest
+from unittest.mock import patch, mock_open
+from app.scripts.ingest_census_data import (
+    valid_command_line_arg,
+    get_census_data,
+    store_census_data,
+    city_fips,
+)
+
+
+@pytest.fixture
+def supported_cities():
+    return list(city_fips.keys())
+
+
+########################################################################################
+# COMMAND LINE TESTS
+########################################################################################
+
+
+def test_valid_command_line_arg_correct_input(monkeypatch, supported_cities):
+    monkeypatch.setattr("sys.argv", ["extract_census_data.py", "portland"])
+    assert valid_command_line_arg(supported_cities) == "portland"
+
+
+def test_valid_command_line_arg_incorrect_input(monkeypatch, supported_cities):
+    monkeypatch.setattr("sys.argv", ["extract_census_data.py", "miami"])
+    with pytest.raises(SystemExit) as excinfo:
+        valid_command_line_arg(supported_cities)
+    assert "Unsupported city. Available options: nyc, chicago, portland" == str(
+        excinfo.value
+    )
+
+
+@pytest.mark.parametrize(
+    "argv", [(["extract_census_data.py"]), (["extract_census_data.py", "nyc", "miami"])]
+)
+def test_valid_command_line_arg_incorrect_number_inputs(
+    monkeypatch, supported_cities, argv
+):
+    monkeypatch.setattr("sys.argv", argv)
+    with pytest.raises(SystemExit) as excinfo:
+        valid_command_line_arg(supported_cities)
+    assert "Retype command as 'python3 extract_census_data.py <city_name>'" == str(
+        excinfo.value
+    )
+
+
+########################################################################################
+# DATA SCRAPING TESTS
+########################################################################################
+
+
+@patch("requests.get")
+def test_get_census_data_successful_api_call(mock_get):
+    mock_data = [
+        ["state_code", "county_code", "B01001_001E", "block_group"],
+        ["36", "061", "1000", "1"],
+        ["36", "047", "2000", "2"],
+    ]
+    mock_get.return_value.status_code = 200
+    mock_get.return_value.json.return_value = mock_data
+
+    expected = [
+        {
+            "state_code": "36",
+            "county_code": "061",
+            "B01001_001E": "1000",
+            "block_group": "1",
+        },
+        {
+            "state_code": "36",
+            "county_code": "047",
+            "B01001_001E": "2000",
+            "block_group": "2",
+        },
+    ]
+    actual = get_census_data({"B01001_001E": "total_population"}, "36", "061")
+    assert actual == expected
+
+
+@pytest.mark.parametrize("status_code", [400, 401, 404, 429, 500, 503])
+@patch("requests.get")
+def test_get_census_data_api_failure(mock_get, status_code):
+    mock_get.return_value.status_code = status_code
+    mock_get.return_value.json.return_value = {"error": "Something went wrong"}
+    assert get_census_data({"B01001_001E": "total_population"}, "36", "061") == []
+
+
+########################################################################################
+# DATA STORING TESTS
+########################################################################################
+
+
+@pytest.fixture
+def mock_env(monkeypatch):
+    monkeypatch.setattr(os, "getcwd", lambda: "/fake/dir")
+    monkeypatch.setattr(sys, "argv", ["script.py", "nyc"])
+
+
+def test_store_census_data_with_valid_data(mock_env):
+    """
+    Test
+    """
+    with patch("builtins.open", mock_open(read_data="data"), create=True) as mock_file:
+        data = [
+            {
+                "state_code": "36",
+                "county_code": "061",
+                "total_population": "1000",
+                "block_group": "1",
+            },
+            {
+                "state_code": "36",
+                "county_code": "047",
+                "total_population": "2000",
+                "block_group": "2",
+            },
+        ]
+        store_census_data(data, "nyc")
+        mock_file.assert_called_once_with("/fake/dir/nyc_data.csv", "w", newline="")
+        handle = mock_file()
+        assert handle.write.call_count > 2
+
+
+def test_store_census_data_with_invalid_data(mock_env):
+    """
+    Test
+    """
+    invalid_data = [
+        {
+            "state_code": "36",
+        },
+        {
+            "county_code": "047",
+        },
+    ]
+    with patch("builtins.open", mock_open(read_data="data"), create=True) as mock_file:
+        store_census_data(invalid_data, "nyc")
+        mock_file.assert_called_once_with("/fake/dir/nyc_data.csv", "w", newline="")
+        handle = mock_file()
+        assert handle.write.call_count == 3