# create database from scratch

In [19]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from faker import Faker
from datetime import datetime, timedelta
import os
import random

# filename is: ./database.db
base_dir = os.getcwd()
db_path = os.path.join(base_dir, 'database.db')

# initialize
faker = Faker('en_AU')
db = SQLAlchemy()
app = Flask(__name__)

app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///' + db_path
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

db.init_app(app)

In [20]:
# 3 tables: student, supervisor, partner
class Student(db.Model):
    __tablename__ = 'student'
    __table_args__ = {"extend_existing": True}

    student_id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String, nullable=False, unique=True)
    first_name = db.Column(db.String, nullable=False)
    last_name = db.Column(db.String, nullable=False)
    email = db.Column(db.String, nullable=False, unique=True)
    password = db.Column(db.String, nullable=False)
    avatar = db.Column(db.String, nullable=False)

    qualification = db.Column(db.String, nullable=False)
    school_name = db.Column(db.String, nullable=False)
    major = db.Column(db.String, nullable=False)
    skills = db.Column(db.String, nullable=False)
    strength = db.Column(db.String, nullable=False)

    # !!!!
    resume_url = db.Column(db.String, nullable=False, unique=True)


class Supervisor(db.Model):
    __tablename__ = 'supervisor'
    __table_args__ = {"extend_existing": True}

    supervisor_id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String, nullable=False, unique=True)
    first_name = db.Column(db.String, nullable=False)
    last_name = db.Column(db.String, nullable=False)
    email = db.Column(db.String, nullable=False, unique=True)
    password = db.Column(db.String, nullable=False)
    avatar = db.Column(db.String, nullable=False)

    qualification = db.Column(db.String, nullable=False)
    school_name = db.Column(db.String, nullable=False)
    skills = db.Column(db.String, nullable=False)


class Partner(db.Model):
    __tablename__ = 'partner'
    __table_args__ = {"extend_existing": True}

    partner_id = db.Column(db.Integer, primary_key=True)
    username = db.Column(db.String, nullable=False, unique=True)
    first_name = db.Column(db.String, nullable=False)
    last_name = db.Column(db.String, nullable=False)
    email = db.Column(db.String, nullable=False, unique=True)
    password = db.Column(db.String, nullable=False)
    avatar = db.Column(db.String, nullable=False)

    company = db.Column(db.String, nullable=False)
    position = db.Column(db.String, nullable=False)
    description = db.Column(db.String, nullable=False)

################################################################

# the partner creates the project
class Project(db.Model):
    __tablename__ = 'project'
    __table_args__ = {"extend_existing": True}

    # each partner may have multiple project
    project_id = db.Column(db.Integer, primary_key=True)
    partner_id = db.Column(db.Integer, db.ForeignKey('partner.partner_id'), nullable=False)

    title = db.Column(db.String, nullable=False)
    description = db.Column(db.String, nullable=False)

    # the project detail
    problem_statement = db.Column(db.String, nullable=False)
    desired_outcomes = db.Column(db.String, nullable=False)
    required_skills = db.Column(db.String, nullable=False)
    deliverables = db.Column(db.String, nullable=False)

    # the requirements for students
    requirements = db.Column(db.String, nullable=False)

    # timestamp
    last_updated_at = db.Column(db.DateTime, nullable=False, default=datetime.now, onupdate=datetime.now)

    # the partner decides for supervisor id,
    # then the supervisor decides for student id
    # these two values are null at beginning
    supervisor_id = db.Column(db.Integer, db.ForeignKey('supervisor.supervisor_id'), nullable=True)
    student_id = db.Column(db.Integer, db.ForeignKey('student.student_id'), nullable=True)

    # project has a flag: is_open, in_progres, closed
    status = db.Column(db.Enum('is_open', 'in_progress', 'closed'), nullable=False)

# the student express interests, and attach a sentence
class StudentInterestExpress(db.Model):
    __tablename__ = 'student_interest_express'
    __table_args__ = {"extend_existing": True}

    # each student may have multiple interests to multiple projects
    student_interest_express_id = db.Column(db.Integer, primary_key=True)
    student_id = db.Column(db.Integer, db.ForeignKey('student.student_id'), nullable=False)
    project_id = db.Column(db.Integer, db.ForeignKey('project.project_id'), nullable=False)
    reason = db.Column(db.String, nullable=False)
    last_updated_at = db.Column(db.DateTime, nullable=False, default=datetime.now, onupdate=datetime.now)

# the supervisor also need to express interests to link project with student,
# also attach a sentence
class SupervisorInterestExpress(db.Model):
    __tablename__ = 'supervisor_interest_express'
    __table_args__ = {"extend_existing": True}

    # each supervisor may have multiple interests to multiple projects
    supervisor_interest_express_id = db.Column(db.Integer, primary_key=True)
    supervisor_id = db.Column(db.Integer, db.ForeignKey('supervisor.supervisor_id'), nullable=False)
    project_id = db.Column(db.Integer, db.ForeignKey('project.project_id'), nullable=False)
    reason = db.Column(db.String, nullable=False)
    last_updated_at = db.Column(db.DateTime, nullable=False, default=datetime.now, onupdate=datetime.now)

# the student reports for progress for the project,
# recorded with date, and content, and will be commented by the partner only
class ProjectProgress(db.Model):
    __tablename__ = 'project_progress'
    __table_args__ = {"extend_existing": True}

    # each student may have multiple progress for a project
    project_progress_id = db.Column(db.Integer, primary_key=True)
    project_id = db.Column(db.Integer, db.ForeignKey('project.project_id'), nullable=False)
    student_id = db.Column(db.Integer, db.ForeignKey('student.student_id'), nullable=False)

    # the student can upload some text content, and a file url,
    # url can be null
    content = db.Column(db.String, nullable=False)
    file_url = db.Column(db.String, nullable=True)
    student_last_updated_at = db.Column(db.DateTime, nullable=False, default=datetime.now, onupdate=datetime.now)

    # the partner will provide some comment only
    partner_id = db.Column(db.Integer, db.ForeignKey('partner.partner_id'), nullable=False)
    partner_feedback = db.Column(db.String, nullable=False)
    partner_last_updated_at = db.Column(db.DateTime, nullable=False, default=datetime.now, onupdate=datetime.now)


# the supervisor can provide feedback at all times
class ProjectSupervisorFeedback(db.Model):
    __tablename__ = 'project_supervisor_feedback'
    __table_args__ = {"extend_existing": True}

    # each supervisor may have multiple feedback for a project
    project_supervisor_feedback_id = db.Column(db.Integer, primary_key=True)
    project_id = db.Column(db.Integer, db.ForeignKey('project.project_id'), nullable=False)
    supervisor_id = db.Column(db.Integer, db.ForeignKey('supervisor.supervisor_id'), nullable=False)

    feedback = db.Column(db.String, nullable=False)
    supervisor_last_updated_at = db.Column(db.DateTime, nullable=False, default=datetime.now, onupdate=datetime.now)

In [21]:
with app.app_context():
    db.drop_all()
    db.create_all()

## Generate fake data for the database, save to ./raw_data

In [22]:
# Create some fake data
qualifications = [
    'Bachelor of Science', 'Bachelor of Arts', 'Master of Science', 'Master of Business Administration',
    'Bachelor of Commerce', 'PhD in Computer Science', 'Bachelor of Engineering', 'Master of Arts',
    'Bachelor of Information Technology', 'Master of Information Technology', 'Doctor of Philosophy in Arts',
    'Bachelor of Education'
]

school_names = [
    'University of Sydney', 'University of Melbourne', 'University of Queensland', 'Australian National University',
    'University of Western Australia', 'University of Adelaide', 'Monash University', 'University of New South Wales',
    'University of Canberra', 'University of Tasmania', 'James Cook University', 'Macquarie University'
]

majors = [
    'Computer Science', 'Business Administration', 'Biology', 'Mathematics', 'Chemistry', 'Physics',
    'History', 'Literature', 'Psychology', 'Philosophy', 'Education', 'Civil Engineering', 'Electrical Engineering',
    'Mechanical Engineering'
]

skills_list = [
    'Problem solving', 'Data analysis', 'Teamwork', 'Software development', 'Project management', 'Leadership',
    'Statistical analysis', 'Public speaking', 'Digital marketing', 'UX/UI design', 'Database management',
    'Graphic design', 'Networking', 'Research'
]

strengths_list = [
    'Strong analytical capabilities', 'Adept at multitasking', 'Highly communicative and team-oriented',
    'Profound coding abilities', 'Solid management skills', 'Exceptional problem-solving aptitude',
    'Detail-oriented', 'Resourceful and creative', 'Versatile skillset', 'Quick learner', 'Adaptable to change',
    'Goal-driven'
]

students = []
NUM_STUDENTS = 20

for i in range(NUM_STUDENTS):
    # so the name is never duplicate
    first_name = faker.unique.first_name()
    last_name = faker.unique.last_name()

    # unique username, email, password all the same
    username = f"{first_name.lower()}-{last_name.lower()}"
    email = f"{first_name.lower()}-{last_name.lower()}@student.edu.au"
    password = "Abcd1234!"

    # random qualification, school_name, major, skills, strength
    qualification = random.choice(qualifications)
    school_name = random.choice(school_names)
    major = random.choice(majors)
    skills = ", ".join(random.sample(skills_list, 3))
    strength = random.choice(strengths_list)

    students.append(f"{username}|{first_name}|{last_name}|{email}|{password}|{qualification}|{school_name}|{major}|{skills}|{strength}")

# open the file: ./raw_data/student.txt
with open('./raw_data/student.txt', 'w') as f:
    for student in students:
        f.write(student + '\n')

In [23]:
# 10 supervisor is enough
# Lists for realistic values
qualifications_supervisor = [
    'PhD in Computer Science', 'PhD in Business Administration', 'Master of Business Administration',
    'Master of Science', 'Master of Arts', 'Doctor of Philosophy in Arts',
    'Bachelor of Engineering', 'Master of Information Technology',
    'PhD in Electrical Engineering', 'PhD in Mechanical Engineering', 'Doctorate in Psychology'
]

supervisors = []

for i in range(10):
    first_name = faker.unique.first_name()
    last_name = faker.unique.last_name()

    username = f"{first_name.lower()}-{last_name.lower()}"
    email = f"{first_name.lower()}.{last_name.lower()}@school.edu.au"
    password = "Abcd1234!"

    # qualification is a new list, school_name and skills are the same the same
    qualification = random.choice(qualifications_supervisor)
    school_name = random.choice(school_names)
    skills = ", ".join(random.sample(skills_list, 4))

    supervisors.append(f"{username}|{first_name}|{last_name}|{email}|{password}|{qualification}|{school_name}|{skills}")

# open the file: ./raw_data/supervisor.txt
with open('./raw_data/supervisor.txt', 'w') as f:
    for supervisor in supervisors:
        f.write(supervisor + '\n')

In [None]:
# partner

# Lists for position and description
positions = [
    "CEO", "CTO", "COO", "Chief Officer",
    "Director of Operations", "Director of Sales",
    "HR", "Head of HR"
]

descriptions = [
    "Over 20 years of experience leading technology teams.",
    "Proven track record in scaling startups to multi-million user bases.",
    "Expertise in global finance, managing budgets exceeding $100 million.",
    "Passionate about innovative marketing strategies and their execution in the global market.",
    "Directed multiple successful product launches, resulting in a 300% growth in sales.",
    "Broad experience in setting company vision, strategy, and successfully executing it.",
    "Adept at building, maintaining, and nurturing high-value customer relationships.",
    "Significant expertise in business transformation and change management across diverse industries.",
    "Strong background in data-driven decision making, leveraging cutting-edge analytics tools.",
    "Passionate about fostering team growth, development, and ensuring optimal productivity.",
    "Has spearheaded numerous green initiatives, pushing for sustainability in the industry.",
    "Holds a successful record in mergers and acquisitions, optimizing business value.",
    "Expert in crafting and implementing efficient operational processes, reducing overheads by 40%.",
    "Pioneered the adoption of AI and ML techniques for business optimization in the company.",
    "Deep understanding of the supply chain, having improved logistics efficiency by 25%.",
    "Championed company culture, employee satisfaction, and reduced attrition rates.",
    "Frequent speaker at international conferences, representing the company's vision and achievements.",
    "Skilled in negotiation, having secured valuable partnerships and contracts.",
    "Strong advocate for diversity and inclusion, implementing company-wide programs.",
    "Extensive background in R&D, driving innovation and patenting new technologies.",
    "Played a pivotal role in expanding the company's footprint in Asia and Europe.",
    "Known for a hands-on leadership style, directly contributing to project successes.",
    "Guided by a strong ethical compass, prioritizing transparency and accountability.",
    "Consistently prioritizes customer feedback, leading to better product iterations.",
    "Highly skilled in crisis management, steering the company through multiple challenging periods."
]

NUM_PARTNERS = 20
partners = []

for i in range(NUM_PARTNERS):
    # name no duplicate
    first_name = faker.unique.first_name()
    last_name = faker.unique.last_name()

    username = f"{first_name.lower()}-{last_name.lower()}"
    email = f"{first_name.lower()}.{last_name.lower()}@business.au"
    password = "Abcd1234!"

    # company, position, description
    company = faker.company()
    position = random.choice(positions)
    description = random.choice(descriptions)

    partners.append(f"{username}|{first_name}|{last_name}|{email}|{password}|{company}|{position}|{description}")

# write this into ./raw_data/partner.txt
with open('./raw_data/partner.txt', 'w') as f:
    for partner in partners:
        f.write(partner + '\n')