In [None]:
#Application of frictionless principles to the relevant datafile
#output is JSON file describing the relevant datafile

#Please download frictionless at frictionlessdata.io/software/#software-toolkit

In [14]:
#import frictionless packages
import frictionless
from frictionless import describe

import pprint
pp = pprint.PrettyPrinter(depth=4)

In [15]:
#'describe' datasets and print
TX_HPD = describe('https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_History-Photographic-Dataset_Items/TX-History-Photographic-Dataset_Items.csv')

pp.pprint(TX_HPD)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-history-photographic-dataset_items',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_History-Photographic-Dataset_Items/TX-History-Photographic-Dataset_Items.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'name': 'County', 'type': 'string'},
                       {'name': 'state', 'type': 'string'},
                       {'name': 'year_start', 'type': 'integer'},
                       {'name': 'year-end', 'type': 'integer'},
                       {'name': 'count', 'type': 'integer'}]},
 'scheme': 'https'}


In [19]:
#applying more detail to each column
TX_HPD.schema.get_field("County").title = "County"
TX_HPD.schema.get_field("County").description = "Texas county name"

TX_HPD.schema.get_field("state").title = "State, Texas"
TX_HPD.schema.get_field("state").description = "Indicator for State - this dataset is only relevant to Texas"

TX_HPD.schema.get_field("year_start").title = "Start Year"
TX_HPD.schema.get_field("year_start").description = "Year indicating the beginning of each decade for the corresponding count of items in the relevant county"

TX_HPD.schema.get_field("year-end").title = "End Year"
TX_HPD.schema.get_field("year-end").description = "Year indicating the end of each decade for the corresponding count of items in the relevant county"

TX_HPD.schema.get_field("count").title = "Items Count"
TX_HPD.schema.get_field("count").description = "Number of photographic items released in each county within the specified decade, regardless of source, according to the Texas History Photographic Dataset (THPD)"


pp.pprint(TX_HPD)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-history-photographic-dataset_items',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_History-Photographic-Dataset_Items/TX-History-Photographic-Dataset_Items.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
                        'name': 'state',
                        'title': 'State, Texas',
                        'type': 'string'},
                       {'description': 'Year indicating the beginning of each '
                                       'decade for the corresponding count of '
                 

In [20]:
#handling missing values
TX_HPD.schema.missing_values = [""]
pp.pprint(TX_HPD)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-history-photographic-dataset_items',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_History-Photographic-Dataset_Items/TX-History-Photographic-Dataset_Items.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
                        'name': 'state',
                        'title': 'State, Texas',
                        'type': 'string'},
                       {'description': 'Year indicating the beginning of each '
                                       'decade for the corresponding count of '
                 

In [21]:
#apply constraint field descriptors to the tables
TX_HPD.schema.get_field("state").constraints["enum"] = ['TX']

pp.pprint(TX_HPD)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-history-photographic-dataset_items',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_History-Photographic-Dataset_Items/TX-History-Photographic-Dataset_Items.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
                        'name': 'state',
                        'title': 'State, Texas',
                        'type': 'string'},
                       {'description': 'Year indicating the beginning of each '
                                       'decade for the corresponding count of '
                 

In [22]:
#add table description
TX_HPD.description = "This dataset contains the number of photographic items released for each county within the decade specified by the start year and end year columns, regardless of source, according to the Texas History Photographic Dataset (THPD)"
pp.pprint(TX_HPD)

{'description': 'This dataset contains the number of photographic items '
                'released for each county within the decade specified by the '
                'start year and end year columns, regardless of source, '
                'according to the Texas History Photographic Dataset (THPD)',
 'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-history-photographic-dataset_items',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_History-Photographic-Dataset_Items/TX-History-Photographic-Dataset_Items.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'constraints': {...},
                        'description': 'Indicator for State - this dataset is '
                                       'only relevant to Texas',
     

In [24]:
#print JSON file
TX_HPD.to_json("tableschema.json")

'{\n  "path": "https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_History-Photographic-Dataset_Items/TX-History-Photographic-Dataset_Items.csv",\n  "name": "tx-history-photographic-dataset_items",\n  "profile": "tabular-data-resource",\n  "scheme": "https",\n  "format": "csv",\n  "hashing": "md5",\n  "encoding": "utf-8",\n  "schema": {\n    "fields": [\n      {\n        "name": "County",\n        "type": "string",\n        "title": "County",\n        "description": "Texas county name"\n      },\n      {\n        "name": "state",\n        "type": "string",\n        "title": "State, Texas",\n        "description": "Indicator for State - this dataset is only relevant to Texas",\n        "constraints": {\n          "enum": [\n            "TX"\n          ]\n        }\n      },\n      {\n        "name": "year_start",\n        "type": "integer",\n        "title": "Start Year",\n        "description": "Year indicating the beginning of each decade for the corresponding 

In [25]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 3A43-426E

 Directory of C:\Users\Dream Machine\Desktop\LEADING20211018\TX_Digital-Newspaper-Program_Issues

11/03/2021  06:37 PM    <DIR>          .
11/03/2021  06:37 PM    <DIR>          ..
11/03/2021  06:26 PM    <DIR>          .ipynb_checkpoints
11/03/2021  06:37 PM            20,942 Frictionless-App-Script_TX-Digital-Newspaper-Program_Issues.ipynb
11/03/2021  06:38 PM             1,879 tableschema.json
               2 File(s)         22,821 bytes
               3 Dir(s)  50,568,605,696 bytes free
