# Movie Recommendation System

In [None]:
# Columns now represent [Likes Action, Likes Comedy, Likes Drama]
user_preferences_expanded = np.array([
    [1, 1, 0],  # U1's known preferences
    [0, 1, 1],  # U2's preferences
    [1, 1, 0],  # U3's preferences
    [1, 0, 1],  # U4's preferences
    [0, 1, 0]   # U5's preferences
])

# Expanded movie genres for new predictions
# Movies: M3 (Drama), M4 (Action), M6 (Action & Drama)
# Columns: [Is Action, Is Comedy, Is Drama]
movie_genres_expanded = np.array([
    [0, 0, 1],  # M3's genres
    [1, 0, 0],  # M4's genres
    [1, 0, 1]   # M6's genres (Action & Drama)
])

In [None]:
# Function to predict preferences for the expanded dataset
def predict_preferences_expanded(user_preferences, movie_genres):
    predictions = []
    # Iterate over each movie genre combination
    for movie_genre in movie_genres:
        p_likes = []
        for genre_index, has_genre in enumerate(movie_genre):
            if has_genre:
                # Filter preferences and labels for the current genre
                relevant_preferences = user_preferences[:, genre_index]
                relevant_labels = relevant_preferences[1:]  # Exclude U1

                # Calculate probabilities for liking each genre
                p_like = np.mean(relevant_labels == 1)
                p_dislike = np.mean(relevant_labels == 0)

                # Conditional independence assumption here
                p_like_given_features = p_like * np.mean(relevant_preferences[0] == 1)
                p_dislike_given_features = p_dislike * np.mean(relevant_preferences[0] == 0)

                # Normalize probabilities
                p_sum = p_like_given_features + p_dislike_given_features
                p_like_given_features /= p_sum
                p_dislike_given_features /= p_sum

                p_likes.append(p_like_given_features)

        # Aggregate probabilities across genres for each movie
        # Here we simply average the probabilities for simplicity
        avg_p_like = np.mean(p_likes)
        prediction = 1 if avg_p_like > 0.5 else 0
        predictions.append(prediction)

    return predictions

In [None]:
# Predict U1's preferences for the new movies (M3, M4, M6)
predictions_expanded = predict_preferences_expanded(user_preferences_expanded, movie_genres_expanded)
predictions_expanded

[0, 1, 0]

#Without data augmentation

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Sample dataset
texts = ["I love this product", "Worst purchase ever", "Will buy again", "Not worth the money", "Highly recommend"]
labels = [1, 0, 1, 0, 1]  # 1: Positive, 0: Negative

# Preprocessing and feature extraction
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)
y = labels

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = MultinomialNB()
model.fit(X_train, y_train)

# Prediction and evaluation
predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))


Accuracy: 0.0


#Using data augmentation

In [None]:
import nltk
from nltk.corpus import wordnet
from random import choice
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Ensure you have these NLTK datasets downloaded
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('punkt')

def get_synonyms(word):
    """Get synonyms of a word."""
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return list(synonyms)

def replace_synonyms(sentence):
    """Replace random words in the sentence with their synonyms."""
    words = nltk.word_tokenize(sentence)
    pos_tags = nltk.pos_tag(words)

    new_words = []
    for word, tag in pos_tags:
        synonyms = get_synonyms(word)
        if synonyms and word.lower() not in ['is', 'was', 'were', 'am', 'are']:  # Avoid replacing auxiliary verbs
            new_word = choice(synonyms)
            new_words.append(new_word)
        else:
            new_words.append(word)

    return ' '.join(new_words)

def augment_data(sentences, labels, augment_factor=1):
    """Augment the dataset by replacing synonyms.

    Augment Factor = N: For each original sentence, N augmented versions are created.
    The total dataset size becomes (N+1) times the original size.
    For 10 original sentences, you would get (N+1) * 10 sentences in total.

    """
    augmented_sentences = []
    augmented_labels = []
    for sentence, label in zip(sentences, labels):
        augmented_sentences.append(sentence)  # Original sentence
        augmented_labels.append(label)
        for _ in range(augment_factor):
            augmented_sentence = replace_synonyms(sentence)
            augmented_sentences.append(augmented_sentence)
            augmented_labels.append(label)
    return augmented_sentences, augmented_labels

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
# Expanded dataset
texts = [
    "I love this product",
    "Worst purchase ever",
    "Will buy again",
    "Not worth the money",
    "Highly recommend",
    "Completely satisfied",
    "Terrible customer service",
    "Fantastic quality",
    "Not as described",
    "Exceeded my expectations"
]
labels = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1]  # 1: Positive, 0: Negative

# Augment the dataset
augmented_texts, augmented_labels = augment_data(texts, labels, augment_factor=2)
print(augmented_texts)

['I love this product', 'iodin do_it this product', 'iodine honey this ware', 'Worst purchase ever', "mop_up leverage e'er", "rack_up buy e'er", 'Will buy again', 'will corrupt over_again', 'bequeath steal once_more', 'Not worth the money', 'not Worth the money', 'not worth the money', 'Highly recommend', 'extremely recommend', 'highly recommend', 'Completely satisfied', 'wholly fulfil', 'entirely meet', 'Terrible customer service', 'awful client serving', 'horrific customer table_service', 'Fantastic quality', 'wondrous prime', 'grand select', 'Not as described', 'non American_Samoa line', 'not group_A describe', 'Exceeded my expectations', 'top my expectation', 'surpass my outlook']


In [None]:
# Preprocessing and feature extraction
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(augmented_texts)
y = augmented_labels

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = MultinomialNB()
model.fit(X_train, y_train)

# Prediction and evaluation
predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))

Accuracy: 1.0


#Without using inbuilt libraries

In [None]:
import numpy as np  # Import the numpy library for numerical operations
import nltk  # Import the Natural Language Toolkit for NLP tasks
from nltk.corpus import wordnet  # Import the wordnet corpus from NLTK for lexical database
from random import choice  # Import choice to select random elements
from collections import Counter  # Import Counter to count hashable objects

# Download necessary NLTK datasets for processing
nltk.download('averaged_perceptron_tagger')  # POS tagger
nltk.download('wordnet')  # Lexical database for the English language
nltk.download('punkt')  # Punkt Tokenizer Model

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
def tokenize(text):
    """Tokenize the input text and convert to lower case."""
    return nltk.word_tokenize(text.lower())

def get_vocabulary(texts):
    """Build and return a set of unique tokens from a list of texts."""
    all_tokens = [token for text in texts for token in tokenize(text)]
    vocabulary = set(all_tokens)
    return vocabulary

def texts_to_bow(texts, vocabulary):
    """Convert a list of texts to a bag-of-words numpy array."""
    vocab_list = list(vocabulary)  # Convert the set to a list for indexing
    bow_array = np.zeros((len(texts), len(vocabulary)), dtype=int)  # Initialize BOW array

    for i, text in enumerate(texts):  # Iterate over texts
        tokens = tokenize(text)  # Tokenize the current text
        token_counts = Counter(tokens)  # Count occurrences of each token
        for token, count in token_counts.items():  # Iterate over token counts
            if token in vocabulary:  # Check if token is in the vocabulary
                bow_array[i, vocab_list.index(token)] = count  # Update BOW array

    return bow_array

In [None]:
class NaiveBayesClassifier:
    def __init__(self):
        """Initialize the classifier with empty log-prior and log-likelihood."""
        self.log_prior = None
        self.log_likelihood = None
        self.vocabulary = None

    def fit(self, X, y):
        """Train the classifier with feature array X and target array y."""
        m, n = X.shape  # Number of samples and features
        self.vocabulary = n  # Set the vocabulary size

        # Calculate log prior for each class
        n_classes = np.unique(y)  # Unique classes
        self.log_prior = np.log(np.array([np.sum(y == c) for c in n_classes]) / m)

        # Calculate log likelihood for each word/class combination
        self.log_likelihood = np.zeros((len(n_classes), n))
        for c in n_classes:
            class_mask = y == c  # Create a mask for each class
            class_count = np.sum(class_mask)  # Count samples in the class
            word_count = np.sum(X[class_mask], axis=0)  # Sum word counts for the class
            self.log_likelihood[c] = np.log((word_count + 1) / (np.sum(word_count) + n))  # Laplace smoothing

    def predict(self, X):
        """Predict the class labels for the input array X."""
        m = X.shape[0]  # Number of samples to predict
        n_classes = len(self.log_prior)  # Number of classes
        predictions = np.zeros(m)  # Initialize predictions array

        for i in range(m):  # Iterate over samples
            log_probs = self.log_likelihood @ X[i].T + self.log_prior  # Calculate log probabilities
            predictions[i] = np.argmax(log_probs)  # Choose the class with the highest probability

        return predictions

In [None]:
# Sample original dataset expanded for augmentation
original_texts = [
    "I love this product", "Worst purchase ever", "Will buy again", "Not worth the money", "Highly recommend",
    "Completely satisfied", "Terrible customer service", "Fantastic quality", "Not as described", "Exceeded my expectations"
]
original_labels = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1])  # 1: Positive, 0: Negative

# Generate augmented dataset
augmented_texts, augmented_labels = augment_data(original_texts, original_labels, augment_factor=4)

# Preprocessing and feature extraction for augmented dataset
vocabulary = get_vocabulary(augmented_texts)
X_augmented = texts_to_bow(augmented_texts, vocabulary)
y_augmented = np.array(augmented_labels)

# Split dataset into training and testing
np.random.seed(42)  # For reproducibility
indices = np.random.permutation(len(augmented_texts))
train_indices, test_indices = indices[:int(len(indices)*0.8)], indices[int(len(indices)*0.8):]
X_train, X_test = X_augmented[train_indices], X_augmented[test_indices]
y_train, y_test = y_augmented[train_indices], y_augmented[test_indices]

# Train and evaluate the Naive Bayes classifier
classifier = NaiveBayesClassifier()
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)
accuracy = np.mean(predictions == y_test)

print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 90.00%


In [None]:
# print(X_test)
# print(predictions)

# Connect Four

In [None]:
# Import deepcopy from the copy module to copy the game board correctly.
from copy import deepcopy

# Define the ConnectFour class to encapsulate the game logic.
class ConnectFour:

    def __init__(self):
        # Initialize a 6x7 board with spaces representing empty cells.
        self.board = [[' ' for _ in range(7)] for _ in range(6)]
        # Set the current player to 'X'. Players alternate between 'X' and 'O'.
        self.current_player = 'X'

    # Print the current state of the board.
    def print_board(self):
        for row in self.board:
            print('|' + '|'.join(row) + '|')
        print('+---' * 7 + '+')

    # Attempt to make a move in the specified column.
    def make_move(self, column):
        # Start from the bottom row, moving upwards, to find the first empty cell.
        for row in reversed(range(6)):
            if self.board[row][column] == ' ':
                # Place the current player's token in the empty cell.
                self.board[row][column] = self.current_player
                return True
        # If the column is full, return False to indicate the move can't be made.
        return False

    # Switch the current player from 'X' to 'O', or 'O' to 'X'.
    def switch_player(self):
        self.current_player = 'O' if self.current_player == 'X' else 'X'

    # Check if a move can be made in the specified column (i.e., if the column is not full).
    def is_valid_move(self, column):
        return self.board[0][column] == ' '

    # Check for a win condition for the current player.
    def check_win(self):
        # Check all horizontal lines for a win.
        for c in range(4):
            for r in range(6):
                if self.board[r][c] == self.current_player and \
                   self.board[r][c + 1] == self.current_player and \
                   self.board[r][c + 2] == self.current_player and \
                   self.board[r][c + 3] == self.current_player:
                    return True

        # Check all vertical lines for a win.
        for c in range(7):
            for r in range(3):
                if self.board[r][c] == self.current_player and \
                   self.board[r + 1][c] == self.current_player and \
                   self.board[r + 2][c] == self.current_player and \
                   self.board[r + 3][c] == self.current_player:
                    return True

        # Check all positively sloped diagonals for a win.
        for c in range(4):
            for r in range(3, 6):
                if self.board[r][c] == self.current_player and \
                   self.board[r - 1][c + 1] == self.current_player and \
                   self.board[r - 2][c + 2] == self.current_player and \
                   self.board[r - 3][c + 3] == self.current_player:
                    return True

        # Check all negatively sloped diagonals for a win.
        for c in range(4):
            for r in range(3):
                if self.board[r][c] == self.current_player and \
                   self.board[r + 1][c + 1] == self.current_player and \
                   self.board[r + 2][c + 2] == self.current_player and \
                   self.board[r + 3][c + 3] == self.current_player:
                    return True

        # If no win condition is met, return False.
        return False

    # List all legal moves that can be made (i.e., columns that are not full).
    def legal_moves(self):
        return [c for c in range(7) if self.is_valid_move(c)]

    # Check if the game is a draw (i.e., the board is full and there's no winner).
    def is_draw(self):
        for c in range(7):
            if self.is_valid_move(c):
                return False
        return True

    # Evaluate a window of four cells for scoring in the minimax algorithm.
    ### REWARD SYSTEM ------------------------------------->

    def evaluate_window(self, window):
        '''
        Assesses a "window" of four consecutive slots on the board to determine its strategic value.
        A window can be a sequence of four slots in a row, column, or diagonal.
        The function calculates a score for the window based on the number of pieces belonging to the current player (self.current_player) and the opponent, as well as the number of empty slots (' ').

        Current Player's Advantage:

        Four in a Row: If the window contains four pieces of the current player (window.count(self.current_player) == 4), it's a winning condition, so a high score of +100 is awarded.
        This is the most favorable outcome for the current player.
        Three with an Empty Slot: A window with three pieces of the current player and one empty slot (window.count(self.current_player) == 3 and window.count(' ') == 1)
        indicates a potential win in the next move if the empty slot is filled by the current player. This scenario is awarded a score of +5, reflecting its high strategic value.
        Two with Two Empty Slots: A window with two pieces of the current player and two empty slots (window.count(self.current_player) == 2 and window.count(' ') == 2)
        is less immediately threatening but still offers potential for setting up future moves. This configuration scores +2, indicating a moderate strategic value.

        Opponent's Advantage - threat!
        Opponent's Three with an Empty Slot: If the opponent has three pieces in the window with one empty slot remaining (window.count(opp_player) == 3 and window.count(' ') == 1),
        this poses a significant threat since the opponent could win in their next turn by filling the empty slot. To reflect the need to address this threat, the function deducts a score of -4. This negative score is crucial for the AI to prioritize blocking the opponent's potential wins.

        '''
        score = 0
        if window.count(self.current_player) == 4:
            score += 100 # Four in a Row
        elif window.count(self.current_player) == 3 and window.count(' ') == 1:
            score += 5 # Three with an Empty Slot
        elif window.count(self.current_player) == 2 and window.count(' ') == 2:
            score += 2 # Two with Two Empty Slots

        opp_player = 'O' if self.current_player == 'X' else 'X'
        if window.count(opp_player) == 3 and window.count(' ') == 1:
            score -= 4 # Opponent's Three with an Empty Slot

        return score

    '''
    The scoring logic prioritizes positions that create opportunities for the AI to win (e.g., three in a row with an open fourth space)
    and blocks the opponent's opportunities. By evaluating the board in terms of center control, line potentials (horizontal, vertical, and diagonal),
    the AI can make informed decisions on where to place its next piece, aiming to maximize its chances of winning while minimizing the opponent's opportunities.
    '''

    def score_position(self):
        score = 0
        # Score center column
        '''
        The center column is often considered the most valuable in Connect Four because it allows for
        the most possibilities for creating four-in-a-row, both vertically, horizontally, and diagonally.
        This code creates an array representing all the cells in the center column (self.board[i][3] for each row i in the range 6,
        since the board has 6 rows and the center column index is 3 in a 0-indexed array).
        It then counts how many pieces the current player has in the center column (center_count)
        and adds to the score a value (center_count * 3) that reflects the strategic advantage of controlling the center column.

        '''
        center_array = [self.board[i][3] for i in range(6)]
        center_count = center_array.count(self.current_player)
        score += center_count * 3

        # Score Horizontal
        '''
        This section iterates through each row (r) and examines every horizontal "window" of four adjacent cells within that row.
        A "window" is a subset of the row that contains exactly four cells (row_array[c:c + 4]).
        The loop only needs to go up to index 4 in each row because starting any further right would not leave enough space for a window of four cells.
        Each window is evaluated using the evaluate_window method, which returns a score based on the contents of the
        window (how many pieces belong to the current player versus the opponent), and this score is added to the total score.

        '''
        for r in range(6):
            row_array = [str(i) for i in self.board[r]]
            for c in range(4):
                window = row_array[c:c + 4]
                score += self.evaluate_window(window)

        # Score Vertical
        '''
        Similar to the horizontal scoring, this part evaluates vertical windows. It iterates over each column (c), creating an array for that column.
        For each column, it looks at vertical windows of four cells, starting from the bottom (r goes from 0 to 2).
        The loop stops at 2 because starting from any higher row wouldn't leave enough space for a window of four cells vertically.
        Each vertical window is scored using evaluate_window, contributing to the total score.

        '''
        for c in range(7):
            col_array = [str(self.board[r][c]) for r in range(6)]
            for r in range(3):
                window = col_array[r:r + 4]
                score += self.evaluate_window(window)

        # Score positive sloped diagonal
        '''
        This part scores diagonals that slope upwards from left to right.
        It starts from the bottom three rows and the leftmost four columns to ensure there's enough space for a diagonal of four.
        For each starting position, it creates a diagonal window by increasing the column index (c + i) and
        decreasing the row index (r - i) as it moves through the loop, simulating a positive slope.
        Each positively sloped window is evaluated and contributes to the total score.
        '''
        for r in range(3, 6):
            for c in range(4):
                window = [self.board[r - i][c + i] for i in range(4)]
                score += self.evaluate_window(window)

        # Score negative sloped diagonal
        '''
        This section scores diagonals sloping downwards from left to right. It starts from the top three rows and leftmost four columns for the same reason as above.
        The diagonal window is created by increasing both the row and column indices (r + i and c + i), simulating a negative slope.
        Each negatively sloped window is evaluated, affecting the overall score.
        '''
        for r in range(3):
            for c in range(4):
                window = [self.board[r + i][c + i] for i in range(4)]
                score += self.evaluate_window(window)

        return score

    # The minimax algorithm with alpha-beta pruning.
    def minimax(self, depth, alpha, beta, maximizingPlayer):
        # List all legal moves.
        valid_locations = self.legal_moves()
        # Check for terminal node (win, lose, draw).
        is_terminal = self.check_win() or self.is_draw()
        if depth == 0 or is_terminal:
            # --TODO--
                return (None, self.score_position())
        if maximizingPlayer:
            value = -float('inf')
            column = valid_locations[0]
            for col in valid_locations:
                # Make a move in the column.
                #--TODO--
            return column, value
        else: # Minimizing player
            value = float('inf')
            column = valid_locations[0]
            for col in valid_locations:
                #--TODO--
            return column, value

    # Determine the best move by using the minimax algorithm.
    def best_move(self, depth):
        # Get the column and score of the best move.
        column, minimax_score = self.minimax(depth, -float('inf'), float('inf'), True)
        # Return the best column to move in.
        return column



In [None]:
# Initialize the game
game = ConnectFour()

# Simulate a game
game.make_move(3)  # Player X
game.switch_player()
game.make_move(4)  # Player O
game.switch_player()

# Print the current board
game.print_board()

# Find the best move for player X
best_column = game.best_move(depth=5)
print("best_column", best_column)

# Make the best move for player X and print the updated board
game.make_move(best_column)  # Player X makes the recommended move
game.switch_player()  # Switch to player O for the next turn

# Print the updated board
game.print_board()

| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | |X|O| | |
+---+---+---+---+---+---+---+
best_column 0
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
|X| | |X|O| | |
+---+---+---+---+---+---+---+


In [None]:
# Initialize the game.
game = ConnectFour()
game_over = False
turn = 0

# Main game loop.
while not game_over:
    # Player X move (AI).
    if turn == 0:
        column = game.best_move(5)
        if game.make_move(column):
            print(f"Player X places a piece in column {column}.")
            if game.check_win():
                print("Player X wins!")
                game_over = True
            turn = 1 # Switch turns
            game.switch_player() # Switch players
    # Player O move (AI or human).
    else:
        column = game.best_move(5)
        if game.make_move(column):
            print(f"Player O places a piece in column {column}.")
            if game.check_win():
                print("Player O wins!")
                game_over = True
            turn = 0 # Switch turns
            game.switch_player() # Switch players

    # Check for a draw.
    if game.is_draw():
        print("The game is a draw!")
        game_over = True

    # Print the current board state.
    game.print_board()


Player X places a piece in column 0.
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
|X| | | | | | |
+---+---+---+---+---+---+---+
Player O places a piece in column 0.
| | | | | | | |
| | | | | | | |
| | | | | | | |
| | | | | | | |
|O| | | | | | |
|X| | | | | | |
+---+---+---+---+---+---+---+
Player X places a piece in column 0.
| | | | | | | |
| | | | | | | |
| | | | | | | |
|X| | | | | | |
|O| | | | | | |
|X| | | | | | |
+---+---+---+---+---+---+---+
Player O places a piece in column 0.
| | | | | | | |
| | | | | | | |
|O| | | | | | |
|X| | | | | | |
|O| | | | | | |
|X| | | | | | |
+---+---+---+---+---+---+---+
Player X places a piece in column 0.
| | | | | | | |
|X| | | | | | |
|O| | | | | | |
|X| | | | | | |
|O| | | | | | |
|X| | | | | | |
+---+---+---+---+---+---+---+
Player O places a piece in column 0.
|O| | | | | | |
|X| | | | | | |
|O| | | | | | |
|X| | | | | | |
|O| | | | | | |
|X| | | | | | |
+---+---+---+---+---+---+---+
Player X places a piec