In [1]:
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta

In [2]:
# Initialize Faker
fake = Faker('id_ID')

In [43]:
# Generate dummy data for Users
def generate_users(num_users):
    users = []
    generated_usernames = set()  # Set to store generated usernames
    
    for _ in range(num_users):
        # Generate base identifier (name)
        base_identifier = fake.name().replace('.', '').replace(',', '')  # Remove dots and commas
        
        # Select words from name with more than 2 characters
        words = [word for word in base_identifier.split() if len(word) > 2]
        
        if words:
            # Choose a random word from selected words
            selected_word = random.choice(words)
        else:
            selected_word = base_identifier  # Use full name if no suitable words found
        
        # Generate name from base identifier
        name = base_identifier.capitalize()  # Use the selected word for simplicity
        
        # Generate username from base identifier (lowercase and no spaces)
        username = selected_word.lower().replace(' ', '_')
        
        # Append random numbers until username is unique
        while username in generated_usernames:
            username += str(random.randint(1, 999))  # Append random number
        
        generated_usernames.add(username)  # Add username to set
        
        # Generate email from username (using a domain) and add random numbers
        email = f"{username}@library.com"
           
        user = {
            'User_id': _+1,
            'Name': name,
            'Username': username,
            'Email': email,
            'Phone_number': fake.phone_number(),
        }
        users.append(user)
    
    return users

# Generate dummy data for Libraries
def generate_libraries(num_libraries):
    libraries = []
    for _ in range(num_libraries):
        library = {
            'Library_id': _+1,
            'Name': fake.company(),
            'Address': fake.address()
        }
        libraries.append(library)
    return libraries

# Generate dummy data for Books
def generate_books(num_books):
    books = []
    for _ in range(num_books):
        # Generate a fake title using a pattern or structure
        title = fake.catch_phrase()  # Example usage of Faker's catch_phrase method
        
        book = {
            'Book_id': _+1,
            'Title': title,
            'Author': fake.name(),
            'ISBN': fake.isbn13()
        }
        books.append(book)
    return books

# Generate dummy data for Book_availabilities
def generate_book_availabilities(books, libraries):
    book_availabilities = []
    used_books = set()

    # Ensure each book has at least 1 quantity owned in each library
    for book in books:
        for library in libraries:
            quantity_owned = random.randint(1, 6)  # Random quantity between 1 and 6
            
            book_availability = {
                'Availability_id': len(book_availabilities) + 1,
                'Book_id': book['Book_id'],
                'Library_id': library['Library_id'],
                'Quantity_owned': quantity_owned
            }
            book_availabilities.append(book_availability)
    
    # Add additional random book availabilities
    total_additional = len(books) * len(libraries) // 2
    for _ in range(total_additional):
        book = random.choice(books)
        library = random.choice(libraries)
        
        while (book['Book_id'], library['Library_id']) in used_books:
            book = random.choice(books)
            library = random.choice(libraries)
        
        quantity_owned = random.randint(1, 6)  # Random quantity between 1 and 6
        book_availability = {
            'Availability_id': len(book_availabilities) + 1,
            'Book_id': book['Book_id'],
            'Library_id': library['Library_id'],
            'Quantity_owned': quantity_owned
        }
        book_availabilities.append(book_availability)
    
    return book_availabilities

# Generate dummy data for Borrows ensuring Quantity_owned is updated
def generate_borrows(num_borrows, users, books, libraries, book_availabilities):
    borrows = []
    borrowed_books = {user['User_id']: 0 for user in users}  # Track borrowed books per user
    
    borrow_date = datetime(2023, 1, 1)  # Starting borrow date
    
    for borrow_id in range(1, num_borrows + 1):
        if all(count >= 2 for count in borrowed_books.values()):
            break  # Exit loop if all users have reached borrow limit
        
        user = random.choice(users)
        if borrowed_books[user['User_id']] >= 2:
            continue  # Skip user if they already have 2 borrowed books
        
        available_books = [ba for ba in book_availabilities if ba['Quantity_owned'] > 0 and ba['Book_id'] not in borrowed_books]
        if not available_books:
            break  # Exit if no available books
        
        book_availability = random.choice(available_books)
        
        # Randomly increment borrow date by 1 to 5 days
        increment_days = random.randint(1, 5)
        borrow_date += timedelta(days=increment_days)
        
        due_date = borrow_date + timedelta(days=14)
        
        borrow = {
            'Borrow_id': borrow_id,
            'User_id': user['User_id'],
            'Book_id': book_availability['Book_id'],
            'Library_id': book_availability['Library_id'],
            'Borrow_date': borrow_date,
            'Due_date': due_date
        }
        borrows.append(borrow)
        
        borrowed_books[user['User_id']] += 1  # Increment borrowed count for the user
        book_availability['Quantity_owned'] -= 1  # Reduce quantity owned
        
    return borrows

# Generate dummy data for Returns based on Borrows
def generate_returns(borrows, book_availabilities):
    returns = []
    return_id = 1 
    
    for borrow in borrows:
        if random.choice([True, False]):
            return_date = fake.date_between(start_date=borrow['Borrow_date'], end_date=borrow['Due_date'])
            return_entry = {
                'Return_id': return_id,
                'Borrow_id': borrow['Borrow_id'],
                'Return_date': return_date
            }
            returns.append(return_entry)
            return_id += 1 
            
            # Update Quantity_owned in Book_availabilities upon return
            for ba in book_availabilities:
                if ba['Book_id'] == borrow['Book_id'] and ba['Library_id'] == borrow['Library_id']:
                    ba['Quantity_owned'] += 1  # Increase quantity owned
                    break  # Exit loop once updated
    
    return returns

In [44]:
# Generate data
num_users = 100
num_libraries = 4
num_books = 200
num_borrows = 2000

users = generate_users(num_users)
libraries = generate_libraries(num_libraries)
books = generate_books(num_books)
book_availabilities = generate_book_availabilities(books, libraries)
borrows = generate_borrows(num_borrows, users, books, libraries, book_availabilities)
returns = generate_returns(borrows, book_availabilities)

In [45]:
# Convert data to DataFrames
df_users = pd.DataFrame(users)
df_libraries = pd.DataFrame(libraries)
df_books = pd.DataFrame(books)
df_book_availability = pd.DataFrame(book_availabilities)
df_borrows = pd.DataFrame(borrows)
df_returns = pd.DataFrame(returns)


In [46]:
# Save DataFrames to CSV files
df_users.to_csv('users.csv', index=False)
df_libraries.to_csv('libraries.csv', index=False)
df_books.to_csv('books.csv', index=False)
df_book_availability.to_csv('book_availability.csv', index=False)
df_borrows.to_csv('borrows.csv', index=False)
df_returns.to_csv('returns.csv', index=False)
