In [None]:
#Application of frictionless principles to the relevant datafile
#output is JSON file describing the relevant datafile

#Please download frictionless at frictionlessdata.io/software/#software-toolkit

In [1]:
#import frictionless packages
import frictionless
from frictionless import describe

import pprint
pp = pprint.PrettyPrinter(depth=4)

In [2]:
#'describe' datasets and print
TX_Lib = describe('https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Libraries/TX-County_Libraries.csv')

pp.pprint(TX_Lib)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_libraries',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Libraries/TX-County_Libraries.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'name': 'County', 'type': 'string'},
                       {'name': 'Library Name', 'type': 'string'},
                       {'name': 'Population of the Legal Service Area',
                        'type': 'string'},
                       {'name': 'Legal Establishment', 'type': 'string'},
                       {'name': 'Region', 'type': 'string'}]},
 'scheme': 'https'}


In [4]:
#applying more detail to each column
TX_Lib.schema.get_field("County").title = "County"
TX_Lib.schema.get_field("County").description = "Texas county name"

TX_Lib.schema.get_field("Library Name").title = "Library Name"
TX_Lib.schema.get_field("Library Name").description = "Name of libraries in Texas"

TX_Lib.schema.get_field("Population of the Legal Service Area").title = "Population of the Legal Service Area"
TX_Lib.schema.get_field("Population of the Legal Service Area").description = "Population of the legal service area associated with the relevant library and county"

TX_Lib.schema.get_field("Legal Establishment").title = "Legal Establishment"
TX_Lib.schema.get_field("Legal Establishment").description = "Legal establishment category (e.g., city, county, non-profit)"

TX_Lib.schema.get_field("Region").title = "Region"
TX_Lib.schema.get_field("Region").description = "Texas region relevant to the library and county"


pp.pprint(TX_Lib)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_libraries',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Libraries/TX-County_Libraries.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'description': 'Name of libraries in Texas',
                        'name': 'Library Name',
                        'title': 'Library Name',
                        'type': 'string'},
                       {'description': 'Population of the legal service area '
                                       'associated with the relevant library '
                                       'and county',
                        'name': 'Population of the Legal Service Area',
                        'title': 'Population of the Legal Service Area

In [5]:
#handling missing values
TX_Lib.schema.missing_values = [""]
pp.pprint(TX_Lib)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_libraries',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Libraries/TX-County_Libraries.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'description': 'Name of libraries in Texas',
                        'name': 'Library Name',
                        'title': 'Library Name',
                        'type': 'string'},
                       {'description': 'Population of the legal service area '
                                       'associated with the relevant library '
                                       'and county',
                        'name': 'Population of the Legal Service Area',
                        'title': 'Population of the Legal Service Area

In [6]:
#apply constraint field descriptors to the tables
TX_Lib.schema.get_field("Legal Establishment").constraints["enum"] = ['Non-Profit', 'City', 'County', 'Library District', 'City-County', 'Multi-jurisdictional', 'Non-profit', 'Other']
TX_Lib.schema.get_field("Region").constraints["enum"] = ['Alamo Area Region', 'Big Country Region', 'Central Texas Region', 'Houston Area Region', 'North Texas Region', 'Northeast Texas Region', 'South Texas Region', 'Texas Panhandle Region', 'Texas Trans-Pecos Region', 'West Texas Region']

pp.pprint(TX_Lib)

{'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_libraries',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Libraries/TX-County_Libraries.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'description': 'Name of libraries in Texas',
                        'name': 'Library Name',
                        'title': 'Library Name',
                        'type': 'string'},
                       {'description': 'Population of the legal service area '
                                       'associated with the relevant library '
                                       'and county',
                        'name': 'Population of the Legal Service Area',
                        'title': 'Population of the Legal Service Area

In [7]:
#add table description
TX_Lib.description = "This dataset contains the title, population of service area, legal establishment type, county, and region of libraries in Texas"
pp.pprint(TX_Lib)

{'description': 'This dataset contains the title, population of service area, '
                'legal establishment type, county, and region of libraries in '
                'Texas',
 'encoding': 'utf-8',
 'format': 'csv',
 'hashing': 'md5',
 'name': 'tx-county_libraries',
 'path': 'https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Libraries/TX-County_Libraries.csv',
 'profile': 'tabular-data-resource',
 'schema': {'fields': [{'description': 'Texas county name',
                        'name': 'County',
                        'title': 'County',
                        'type': 'string'},
                       {'description': 'Name of libraries in Texas',
                        'name': 'Library Name',
                        'title': 'Library Name',
                        'type': 'string'},
                       {'description': 'Population of the legal service area '
                                       'associated with the relevant library '
          

In [8]:
#print JSON file
TX_Lib.to_json("tableschema.json")

'{\n  "path": "https://raw.githubusercontent.com/unt-libraries/portal-leading/main/data/TX_Libraries/TX-County_Libraries.csv",\n  "name": "tx-county_libraries",\n  "profile": "tabular-data-resource",\n  "scheme": "https",\n  "format": "csv",\n  "hashing": "md5",\n  "encoding": "utf-8",\n  "schema": {\n    "fields": [\n      {\n        "name": "County",\n        "type": "string",\n        "title": "County",\n        "description": "Texas county name"\n      },\n      {\n        "name": "Library Name",\n        "type": "string",\n        "title": "Library Name",\n        "description": "Name of libraries in Texas"\n      },\n      {\n        "name": "Population of the Legal Service Area",\n        "type": "string",\n        "title": "Population of the Legal Service Area",\n        "description": "Population of the legal service area associated with the relevant library and county"\n      },\n      {\n        "name": "Legal Establishment",\n        "type": "string",\n        "title": "Leg

In [25]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 3A43-426E

 Directory of C:\Users\Dream Machine\Desktop\LEADING20211018\TX_Digital-Newspaper-Program_Issues

11/03/2021  06:37 PM    <DIR>          .
11/03/2021  06:37 PM    <DIR>          ..
11/03/2021  06:26 PM    <DIR>          .ipynb_checkpoints
11/03/2021  06:37 PM            20,942 Frictionless-App-Script_TX-Digital-Newspaper-Program_Issues.ipynb
11/03/2021  06:38 PM             1,879 tableschema.json
               2 File(s)         22,821 bytes
               3 Dir(s)  50,568,605,696 bytes free
