deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "K-Nearest Neighbors (KNN) Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions focused entirely on K-Nearest Neighbors (KNN) — covering intuition, distance metrics, hyperparameter tuning, classification & regression behavior, curse of dimensionality, and real-world use cases.",
"questions": [
{
"id": 1,
"questionText": "What is the core principle behind the KNN algorithm?",
"options": [
"It builds a decision tree and splits data recursively.",
"It constructs a probabilistic model using Bayes theorem.",
"It predicts the label based on the majority class of k nearest data points.",
"It reduces dimensionality using PCA."
],
"correctAnswerIndex": 2,
"explanation": "KNN predicts the class based on voting from the k nearest neighbors in the training data."
},
{
"id": 2,
"questionText": "KNN is considered which type of learning algorithm?",
"options": [
"Eager learning",
"Reinforcement learning",
"Unsupervised learning",
"Lazy learning"
],
"correctAnswerIndex": 3,
"explanation": "KNN is a lazy learner because it does not build a model during training; it only stores the data."
},
{
"id": 3,
"questionText": "Which distance metric is most commonly used in KNN?",
"options": [
"Cosine Similarity",
"Manhattan Distance",
"Jaccard Distance",
"Euclidean Distance"
],
"correctAnswerIndex": 3,
"explanation": "Euclidean Distance (L2 norm) is the most common distance metric used in KNN."
},
{
"id": 4,
"questionText": "KNN is mainly used for:",
"options": [
"Only regression",
"Only clustering",
"Both classification and regression",
"Only classification"
],
"correctAnswerIndex": 2,
"explanation": "KNN can perform both classification (class votes) and regression (mean of k nearest values)."
},
{
"id": 5,
"questionText": "What happens if k is set to a very large value?",
"options": [
"The model becomes faster and more accurate always.",
"The model becomes overly generalized and biased.",
"The model becomes highly sensitive to noise.",
"The model becomes very overfitted."
],
"correctAnswerIndex": 1,
"explanation": "A very large k considers too many neighbors and may smooth out genuine class boundaries, causing high bias."
},
{
"id": 6,
"questionText": "In KNN, what does the parameter 'k' represent?",
"options": [
"Number of features in the dataset",
"Depth of the tree used internally",
"Number of nearest neighbors considered",
"Learning rate of the algorithm"
],
"correctAnswerIndex": 2,
"explanation": "'k' is the number of closest neighbors used to decide the predicted class or value."
},
{
"id": 7,
"questionText": "Which of the following is true about KNN training phase?",
"options": [
"It stores all training data without building a model",
"It builds a model by computing centroids",
"It generates decision boundaries explicitly",
"It calculates feature importance scores"
],
"correctAnswerIndex": 0,
"explanation": "KNN is a lazy learner; during training, it only stores the dataset for use at prediction time."
},
{
"id": 8,
"questionText": "Which KNN variant can handle weighted voting?",
"options": [
"Uniform KNN",
"Decision Tree",
"Random Forest",
"Weighted KNN"
],
"correctAnswerIndex": 3,
"explanation": "Weighted KNN gives closer neighbors higher influence while predicting the output."
},
{
"id": 9,
"questionText": "Which of the following affects KNN performance the most?",
"options": [
"Choice of distance metric",
"Activation function",
"Regularization parameter",
"Number of epochs"
],
"correctAnswerIndex": 0,
"explanation": "KNN relies on distance computations; the choice of distance metric (Euclidean, Manhattan, etc.) is critical."
},
{
"id": 10,
"questionText": "What is the default distance metric for most KNN implementations?",
"options": [
"Cosine similarity",
"Manhattan distance",
"Hamming distance",
"Euclidean distance"
],
"correctAnswerIndex": 3,
"explanation": "Euclidean distance is most commonly used by default in KNN implementations."
},
{
"id": 11,
"questionText": "How does KNN handle a new data point for prediction?",
"options": [
"It updates its model parameters",
"It finds k closest points in the training set and predicts based on them",
"It generates random prediction",
"It builds a regression line through neighbors"
],
"correctAnswerIndex": 1,
"explanation": "KNN predicts by looking at the nearest k training points and using majority vote (classification) or average (regression)."
},
{
"id": 12,
"questionText": "What is the main drawback of KNN on large datasets?",
"options": [
"Does not scale to many classes",
"High training time",
"Cannot handle missing values",
"High prediction time"
],
"correctAnswerIndex": 3,
"explanation": "KNN stores all training data, so prediction involves computing distances to all points, which is slow for large datasets."
},
{
"id": 13,
"questionText": "Which of the following is true about KNN and normalization?",
"options": [
"Normalization is not required",
"Normalization only applies to categorical data",
"Normalization changes class labels",
"Normalization improves distance-based predictions"
],
"correctAnswerIndex": 3,
"explanation": "Since KNN uses distances, features with larger scales can dominate. Normalization ensures fair contribution from all features."
},
{
"id": 14,
"questionText": "How does KNN behave in the presence of irrelevant features?",
"options": [
"Features are automatically ignored",
"Performance improves",
"Performance drops",
"Algorithm ignores them during prediction"
],
"correctAnswerIndex": 2,
"explanation": "Irrelevant features can distort distance calculations and reduce KNN prediction accuracy."
},
{
"id": 15,
"questionText": "What type of algorithm is KNN considered in terms of model structure?",
"options": [
"Non-parametric",
"Linear",
"Probabilistic",
"Parametric"
],
"correctAnswerIndex": 0,
"explanation": "KNN is non-parametric because it does not assume a predefined form for the function mapping inputs to outputs."
},
{
"id": 16,
"questionText": "Which K value is generally recommended to avoid overfitting in KNN?",
"options": [
"k moderate value like sqrt(n)",
"k = 1",
"k very small",
"k equal to dataset size"
],
"correctAnswerIndex": 0,
"explanation": "A moderate k like sqrt(n) balances bias and variance, preventing overfitting."
},
{
"id": 17,
"questionText": "Which metric is suitable for categorical variables in KNN?",
"options": [
"Minkowski distance",
"Manhattan distance",
"Euclidean distance",
"Hamming distance"
],
"correctAnswerIndex": 3,
"explanation": "Hamming distance counts mismatches between categorical feature values."
},
{
"id": 18,
"questionText": "Which of the following is NOT a type of KNN?",
"options": [
"Weighted KNN",
"Regression KNN",
"Decision KNN",
"Classification KNN"
],
"correctAnswerIndex": 2,
"explanation": "There is no 'Decision KNN'; KNN is mainly classification, regression, or weighted variant."
},
{
"id": 19,
"questionText": "What is the effect of having two classes with very imbalanced sizes in KNN?",
"options": [
"Minority class dominates predictions",
"Majority class dominates predictions",
"KNN automatically balances classes",
"Minor impact on accuracy"
],
"correctAnswerIndex": 1,
"explanation": "KNN predictions are influenced by majority neighbors; imbalanced classes may bias the results."
},
{
"id": 20,
"questionText": "What is the primary storage requirement for KNN?",
"options": [
"Feature coefficients",
"All training data points",
"Decision thresholds",
"Distance matrices precomputed"
],
"correctAnswerIndex": 1,
"explanation": "KNN requires storing all training data for distance comparisons at prediction time."
},
{
"id": 21,
"questionText": "What does the term 'curse of dimensionality' refer to in KNN?",
"options": [
"Overfitting in small datasets",
"High computation time with too many neighbors",
"Distances become less meaningful in high dimensions",
"Underfitting in large datasets"
],
"correctAnswerIndex": 2,
"explanation": "As dimensions increase, data points become sparse and distance measures lose effectiveness, reducing KNN performance."
},
{
"id": 22,
"questionText": "Which technique can speed up KNN on large datasets?",
"options": [
"KD-Trees or Ball-Trees",
"Using logistic regression instead",
"Principal Component Analysis",
"Random Forest preprocessing"
],
"correctAnswerIndex": 0,
"explanation": "KD-Trees and Ball-Trees organize data to quickly find nearest neighbors without computing all distances."
},
{
"id": 23,
"questionText": "In KNN regression, how is the predicted value calculated?",
"options": [
"Using linear regression on neighbors",
"Using gradient descent",
"Majority vote of nearest neighbors",
"Average of nearest neighbors’ values"
],
"correctAnswerIndex": 3,
"explanation": "KNN regression predicts by taking the mean (or sometimes weighted mean) of the k nearest neighbors' values."
},
{
"id": 24,
"questionText": "Which of the following is true about KNN decision boundary?",
"options": [
"Always axis-aligned",
"Always linear",
"Depends on data distribution",
"Always circular"
],
"correctAnswerIndex": 2,
"explanation": "KNN decision boundaries can be irregular and follow the shape of data; they are not restricted to linear forms."
},
{
"id": 25,
"questionText": "Which method can improve KNN on high-dimensional data?",
"options": [
"Increasing k to dataset size",
"Feature selection",
"Ignoring normalization",
"Adding more neighbors"
],
"correctAnswerIndex": 1,
"explanation": "Selecting relevant features reduces dimensionality, improving distance calculation reliability."
},
{
"id": 26,
"questionText": "KNN cannot handle which of the following natively?",
"options": [
"Large datasets efficiently",
"Numeric features",
"Categorical features",
"Missing data directly"
],
"correctAnswerIndex": 3,
"explanation": "KNN cannot handle missing values without preprocessing (imputation or removal)."
},
{
"id": 27,
"questionText": "How does KNN handle ties in classification voting?",
"options": [
"Chooses randomly among tied classes",
"Fails with an error",
"Chooses the closest neighbor's class",
"Always chooses class 0"
],
"correctAnswerIndex": 2,
"explanation": "Many implementations break ties by selecting the class of the closest neighbor among the tied classes."
},
{
"id": 28,
"questionText": "Which scenario would make KNN less suitable?",
"options": [
"Low-dimensional small datasets",
"High-dimensional large datasets",
"Well-separated clusters",
"Binary classification"
],
"correctAnswerIndex": 1,
"explanation": "In high-dimensional large datasets, KNN is slow and distances lose meaning, reducing accuracy."
},
{
"id": 29,
"questionText": "What is the time complexity of a naive KNN prediction with n training points?",
"options": [
"O(n^2)",
"O(1)",
"O(log n)",
"O(n)"
],
"correctAnswerIndex": 3,
"explanation": "Naive KNN computes distances to all n points for each prediction, giving O(n) complexity."
},
{
"id": 30,
"questionText": "What preprocessing step can improve KNN accuracy?",
"options": [
"Adding irrelevant features",
"Removing the dependent variable",
"Randomly shuffling the data",
"Scaling features to similar range"
],
"correctAnswerIndex": 3,
"explanation": "Scaling features ensures fair distance computation, preventing one feature from dominating due to larger numeric range."
},
{
"id": 31,
"questionText": "What is the effect of increasing 'k' in KNN classification?",
"options": [
"Decreases bias",
"Reduces overfitting",
"Increases sensitivity to noise",
"Increases model variance"
],
"correctAnswerIndex": 1,
"explanation": "A larger k smooths out predictions, reducing overfitting and variance but increasing bias."
},
{
"id": 32,
"questionText": "Which distance metric can be more robust to outliers in KNN?",
"options": [
"Cosine similarity",
"Minkowski distance",
"Manhattan distance",
"Euclidean distance"
],
"correctAnswerIndex": 2,
"explanation": "Manhattan distance is less sensitive to large deviations in individual features than Euclidean distance."
},
{
"id": 33,
"questionText": "How can KNN be modified for imbalanced datasets?",
"options": [
"Use weighted voting based on distance",
"Increase k to dataset size",
"Normalize features only",
"Remove minority class samples"
],
"correctAnswerIndex": 0,
"explanation": "Weighted voting gives closer neighbors more influence, reducing bias toward the majority class."
},
{
"id": 34,
"questionText": "Which method can reduce KNN prediction time for large datasets?",
"options": [
"Dimensionality reduction like PCA",
"Using random shuffling",
"Increasing k",
"Adding more features"
],
"correctAnswerIndex": 0,
"explanation": "Reducing the number of features with PCA lowers dimensionality, which speeds up distance computation."
},
{
"id": 35,
"questionText": "Why might KNN fail in very high-dimensional spaces?",
"options": [
"Overfitting to majority class",
"Random initialization",
"Learning rate too high",
"Curse of dimensionality"
],
"correctAnswerIndex": 3,
"explanation": "In high dimensions, points become equidistant and neighbors are less meaningful, reducing accuracy."
},
{
"id": 36,
"questionText": "What does weighted KNN regression use instead of simple averaging?",
"options": [
"Distance-based weighting of neighbors",
"Median value of neighbors",
"Majority vote of neighbors",
"Random selection of neighbors"
],
"correctAnswerIndex": 0,
"explanation": "Weighted KNN regression assigns higher weights to closer neighbors when computing the predicted value."
},
{
"id": 37,
"questionText": "Which technique is useful to handle categorical and numeric features together in KNN?",
"options": [
"Ignore numeric features",
"Convert categorical to numeric with one-hot encoding",
"Normalize categorical features only",
"Use majority voting only"
],
"correctAnswerIndex": 1,
"explanation": "One-hot encoding transforms categorical features to numeric so that distance metrics can be applied."
},
{
"id": 38,
"questionText": "In KNN, what is the effect of noisy features?",
"options": [
"Does not affect performance",
"Automatically removed",
"Reduces accuracy",
"Improves accuracy"
],
"correctAnswerIndex": 2,
"explanation": "Noisy features distort distance calculations, reducing prediction accuracy."
},
{
"id": 39,
"questionText": "Which of the following can help KNN generalize better?",
"options": [
"Adding more irrelevant features",
"Reducing k to 1",
"Feature scaling and selection",
"Increasing dataset size without preprocessing"
],
"correctAnswerIndex": 2,
"explanation": "Scaling ensures fair distance comparison, and selecting relevant features removes noise, improving generalization."
},
{
"id": 40,
"questionText": "What happens if k is even and there is a tie in classification?",
"options": [
"Prediction fails with error",
"Tie-breaking strategy is needed",
"Algorithm automatically increments k",
"Randomly ignores the new point"
],
"correctAnswerIndex": 1,
"explanation": "When k is even, ties may occur; most implementations have a tie-breaking rule like choosing the closest neighbor."
},
{
"id": 41,
"questionText": "Which preprocessing step can improve KNN on text data represented by TF-IDF vectors?",
"options": [
"L2 normalization",
"Random shuffling",
"Adding more terms",
"Stop-word removal only"
],
"correctAnswerIndex": 0,
"explanation": "L2 normalization ensures vectors are comparable in distance calculations for KNN."
},
{
"id": 42,
"questionText": "Which of the following affects KNN accuracy most in practice?",
"options": [
"Learning rate",
"Random seed only",
"Distance metric and k",
"Number of trees"
],
"correctAnswerIndex": 2,
"explanation": "Choice of k and distance metric strongly influence KNN performance."
},
{
"id": 43,
"questionText": "In KNN regression, how can you reduce the impact of outliers?",
"options": [
"Use simple mean without weighting",
"Increase k to dataset size",
"Ignore preprocessing",
"Use weighted averaging based on distance"
],
"correctAnswerIndex": 3,
"explanation": "Weighting closer neighbors more heavily reduces the effect of distant outliers."
},
{
"id": 44,
"questionText": "Which approach can make KNN faster on large datasets?",
"options": [
"Increase k to max",
"Add random noise to data",
"KD-Tree, Ball-Tree, or approximate nearest neighbor search",
"Use high-dimensional features"
],
"correctAnswerIndex": 2,
"explanation": "Tree-based or approximate search structures reduce distance computations needed for prediction."
},
{
"id": 45,
"questionText": "How does KNN handle multi-class classification?",
"options": [
"By majority vote among neighbors",
"Cannot handle multi-class",
"By training separate binary classifiers",
"Only predicts top two classes"
],
"correctAnswerIndex": 0,
"explanation": "KNN counts votes among k neighbors for all classes and selects the class with the highest votes."
},
{
"id": 46,
"questionText": "Which distance metric is suitable for high-dimensional sparse data?",
"options": [
"Manhattan distance",
"Euclidean distance",
"Cosine similarity",
"Hamming distance"
],
"correctAnswerIndex": 2,
"explanation": "Cosine similarity works better for high-dimensional sparse vectors like TF-IDF representations."
},
{
"id": 47,
"questionText": "What happens to KNN performance if features are not scaled?",
"options": [
"Dominated by features with larger scales",
"Performance improves automatically",
"Distance calculation is unaffected",
"Accuracy remains same always"
],
"correctAnswerIndex": 0,
"explanation": "Features with larger numeric ranges dominate distance computation, skewing predictions."
},
{
"id": 48,
"questionText": "How can KNN be adapted for regression with categorical features?",
"options": [
"Encode categories numerically or use mixed distance metric",
"Use Euclidean distance directly",
"Remove categorical features",
"Only predict the most frequent category"
],
"correctAnswerIndex": 0,
"explanation": "Encoding categorical features allows KNN to compute distances effectively for regression tasks."
},
{
"id": 49,
"questionText": "What is one common method to select an optimal k?",
"options": [
"Maximizing feature count",
"Using k=1 always",
"Random selection",
"Cross-validation"
],
"correctAnswerIndex": 3,
"explanation": "Cross-validation evaluates different k values to choose the one yielding best performance."
},
{
"id": 50,
"questionText": "Which factor can lead to overfitting in KNN?",
"options": [
"Using fewer neighbors",
"Too small k value",
"Scaling features",
"Using weighted distance"
],
"correctAnswerIndex": 1,
"explanation": "A very small k (like k=1) can fit to noise and outliers, causing overfitting."
},
{
"id": 51,
"questionText": "In KNN, what is an advantage of using odd k values in binary classification?",
"options": [
"Avoid ties in voting",
"Reduce distance calculations",
"Increase speed",
"Improve scaling automatically"
],
"correctAnswerIndex": 0,
"explanation": "Odd k values help prevent ties between two classes."
},
{
"id": 52,
"questionText": "Which type of feature transformation is recommended for KNN?",
"options": [
"Adding irrelevant features",
"One-hot encoding only for numeric data",
"Normalization or standardization",
"Random shuffling of features"
],
"correctAnswerIndex": 2,
"explanation": "Normalization ensures fair contribution of each feature to distance calculation."
},
{
"id": 53,
"questionText": "Which of the following reduces KNN sensitivity to outliers?",
"options": [
"Increase k to 1",
"Use Euclidean distance only",
"Remove normalization",
"Weighted distance averaging"
],
"correctAnswerIndex": 3,
"explanation": "Weighting neighbors based on distance gives closer points more influence, reducing outlier impact."
},
{
"id": 54,
"questionText": "In KNN, what is the effect of adding irrelevant features?",
"options": [
"Automatically removed",
"Decreases accuracy",
"Increases accuracy",
"No effect"
],
"correctAnswerIndex": 1,
"explanation": "Irrelevant features distort distance calculations, reducing prediction accuracy."
},
{
"id": 55,
"questionText": "Which method can improve KNN performance in sparse datasets?",
"options": [
"Ignore distance weighting",
"Add noise to features",
"Dimensionality reduction",
"Increase k to dataset size"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensionality can make distance computations more meaningful in sparse datasets."
},
{
"id": 56,
"questionText": "Which approach helps handle large-scale KNN efficiently?",
"options": [
"Increasing k arbitrarily",
"Scaling only",
"Approximate nearest neighbor search",
"Random shuffling"
],
"correctAnswerIndex": 2,
"explanation": "Approximate nearest neighbor search reduces computational cost while giving nearly correct neighbors."
},
{
"id": 57,
"questionText": "Which of the following is true for KNN regression prediction?",
"options": [
"Weighted average based on neighbor distance",
"Average of nearest neighbors’ values",
"None of the above",
"Both A and B"
],
"correctAnswerIndex": 3,
"explanation": "KNN regression can use simple or weighted averaging of neighbors’ values."
},
{
"id": 58,
"questionText": "Which is a practical drawback of KNN in real-world systems?",
"options": [
"Requires model training",
"High prediction latency",
"Automatically ignores irrelevant features",
"Cannot handle numeric data"
],
"correctAnswerIndex": 1,
"explanation": "KNN computes distances at prediction time, leading to high latency for large datasets."
},
{
"id": 59,
"questionText": "Which type of scaling preserves relative distances between points for KNN?",
"options": [
"Min-Max scaling",
"Log transformation only",
"Adding random noise",
"Shuffling features"
],
"correctAnswerIndex": 0,
"explanation": "Min-Max or standardization scales features to similar ranges while preserving relative distances."
},
{
"id": 60,
"questionText": "Which is a disadvantage of KNN compared to parametric models?",
"options": [
"Requires fixed training",
"Slower predictions for large datasets",
"Cannot model non-linear boundaries",
"Sensitive to overfitting only"
],
"correctAnswerIndex": 1,
"explanation": "KNN stores all training data and computes distances, making predictions slower than parametric models."
},
{
"id": 61,
"questionText": "How can KNN handle multi-label classification?",
"options": [
"Uses separate KNN per label",
"Cannot handle multi-label",
"Predict all labels present in neighbors",
"Only predicts one label"
],
"correctAnswerIndex": 2,
"explanation": "KNN can aggregate labels from neighbors and predict multiple labels per instance."
},
{
"id": 62,
"questionText": "Which distance metric can handle mixed numeric and categorical data?",
"options": [
"Gower distance",
"Euclidean distance",
"Cosine similarity",
"Manhattan distance"
],
"correctAnswerIndex": 0,
"explanation": "Gower distance can compute similarity for mixed numeric and categorical features."
},
{
"id": 63,
"questionText": "What is one way to reduce memory usage in KNN for large datasets?",
"options": [
"Use condensed nearest neighbor algorithms",
"Ignore irrelevant features",
"Increase k to dataset size",
"Normalize only"
],
"correctAnswerIndex": 0,
"explanation": "Condensed nearest neighbor algorithms reduce stored points while maintaining accuracy."
},
{
"id": 64,
"questionText": "Which approach helps improve KNN in imbalanced datasets?",
"options": [
"Increase irrelevant features",
"Use k=1 always",
"Distance-weighted voting",
"Ignore normalization"
],
"correctAnswerIndex": 2,
"explanation": "Weighted voting gives closer points more influence, reducing bias toward majority class."
},
{
"id": 65,
"questionText": "What is the effect of increasing feature dimensionality in KNN?",
"options": [
"Computation decreases",
"Feature importance is automatically computed",
"Accuracy always improves",
"Distances become less meaningful"
],
"correctAnswerIndex": 3,
"explanation": "High-dimensional spaces make points almost equidistant, reducing KNN effectiveness."
},
{
"id": 66,
"questionText": "Which scenario can cause KNN to misclassify a data point?",
"options": [
"Choosing odd k",
"Using weighted voting",
"Nearby points from other class dominate",
"Normalization applied"
],
"correctAnswerIndex": 2,
"explanation": "If neighbors are closer from other classes, KNN may predict incorrectly."
},
{
"id": 67,
"questionText": "Which strategy can improve KNN with very sparse datasets?",
"options": [
"Add random features",
"Ignore distance metric",
"Dimensionality reduction",
"Increase k arbitrarily"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensionality reduces sparsity and makes distances meaningful."
},
{
"id": 68,
"questionText": "What is a good rule of thumb for selecting k?",
"options": [
"k = 1 always",
"k = n/2",
"k = sqrt(n)",
"k = number of features"
],
"correctAnswerIndex": 2,
"explanation": "Using k = sqrt(n) balances bias and variance in most cases."
},
{
"id": 69,
"questionText": "Which technique can speed up KNN predictions in high dimensions?",
"options": [
"Approximate nearest neighbor algorithms",
"Normalize only",
"Random shuffling",
"Increase k to max"
],
"correctAnswerIndex": 0,
"explanation": "Approximate nearest neighbor search reduces computation while maintaining accuracy."
},
{
"id": 70,
"questionText": "Which type of data preprocessing improves KNN performance?",
"options": [
"Random shuffling only",
"Ignoring categorical features",
"Adding irrelevant features",
"Feature scaling and selection"
],
"correctAnswerIndex": 3,
"explanation": "Scaling ensures fair distance measurement, and selecting relevant features removes noise, improving predictions."
},
{
"id": 71,
"questionText": "In a recommendation system using KNN, what could cause poor predictions?",
"options": [
"High number of neighbors",
"Sparse user-item interaction data",
"Low-dimensional features",
"Normalized data"
],
"correctAnswerIndex": 1,
"explanation": "Sparse interaction matrices reduce neighbor similarity reliability, causing poor recommendations."
},
{
"id": 72,
"questionText": "Which approach is suitable for reducing KNN latency in a real-time system?",
"options": [
"Randomly select features",
"Increase k to dataset size",
"Normalize data only",
"Approximate nearest neighbor search"
],
"correctAnswerIndex": 3,
"explanation": "Approximate nearest neighbor algorithms provide fast predictions with minimal accuracy loss."
},
{
"id": 73,
"questionText": "In high-dimensional gene expression data, KNN performance drops because:",
"options": [
"Normalization causes data loss",
"KNN overfits easily with large k",
"Distances become less informative (curse of dimensionality)",
"Minority classes dominate"
],
"correctAnswerIndex": 2,
"explanation": "High-dimensional data makes points nearly equidistant, reducing neighbor relevance and accuracy."
},
{
"id": 74,
"questionText": "Scenario: A new customer profile is very different from existing customers. Which issue might KNN face?",
"options": [
"Predicted class may be inaccurate due to no similar neighbors",
"KNN will automatically ignore the profile",
"Model overfits automatically",
"KNN will generate a new class"
],
"correctAnswerIndex": 0,
"explanation": "If no close neighbors exist, KNN cannot provide reliable predictions."
},
{
"id": 75,
"questionText": "What is the main challenge of KNN when deployed in high-frequency trading?",
"options": [
"Weighted voting is not supported",
"Overfitting to training set",
"Distance metric fails for numeric data",
"High prediction latency due to large datasets"
],
"correctAnswerIndex": 3,
"explanation": "KNN requires computing distances to all stored points, making it too slow for real-time predictions in trading."
},
{
"id": 76,
"questionText": "Scenario: Two classes are very close in feature space but overlapping. Which KNN behavior is expected?",
"options": [
"KNN ignores overlapping points",
"Higher misclassification rate",
"KNN increases k automatically",
"Predictions are perfect"
],
"correctAnswerIndex": 1,
"explanation": "KNN struggles with overlapping classes as neighbors from the wrong class may dominate."
},
{
"id": 77,
"questionText": "Which method improves KNN performance for very high-dimensional image embeddings?",
"options": [
"Use raw pixel values directly",
"Dimensionality reduction (PCA, t-SNE, or UMAP)",
"Increase k to max",
"Randomly shuffle features"
],
"correctAnswerIndex": 1,
"explanation": "Reducing dimensions retains essential information and makes distances meaningful."
},
{
"id": 78,
"questionText": "Scenario: A fraud detection system uses KNN. New types of fraud appear. What is the limitation?",
"options": [
"KNN cannot detect unseen patterns without similar neighbors",
"Prediction latency decreases",
"KNN automatically adapts",
"Accuracy improves with noise"
],
"correctAnswerIndex": 0,
"explanation": "KNN relies on similarity to existing points, so unseen patterns are difficult to detect."
},
{
"id": 79,
"questionText": "What is a limitation of KNN in large-scale recommendation systems?",
"options": [
"Cannot handle numeric data",
"Fails on binary features",
"Overfits automatically",
"Memory and computation intensive"
],
"correctAnswerIndex": 3,
"explanation": "Storing all user-item interactions and computing distances is memory and CPU intensive."
},
{
"id": 80,
"questionText": "Which approach is suitable for speeding up KNN with millions of samples?",
"options": [
"Use weighted voting only",
"Increase k to n",
"Use approximate nearest neighbor libraries like FAISS or Annoy",
"Normalize features only"
],
"correctAnswerIndex": 2,
"explanation": "Approximate search libraries significantly reduce computation while maintaining near-optimal accuracy."
},
{
"id": 81,
"questionText": "Scenario: In KNN, a feature has a huge numeric range. What problem arises?",
"options": [
"Feature dominates distance, biasing prediction",
"Weighted voting fails",
"Prediction latency reduces",
"Feature is ignored automatically"
],
"correctAnswerIndex": 0,
"explanation": "Large-scale features dominate distance computation, skewing predictions unless scaled."
},
{
"id": 82,
"questionText": "What is a strategy to handle missing values in KNN?",
"options": [
"Impute missing values before computing distances",
"Increase k to handle missing",
"Ignore missing values automatically",
"Remove all neighbors with missing values"
],
"correctAnswerIndex": 0,
"explanation": "Missing values should be imputed (mean, median, or mode) to allow proper distance computation."
},
{
"id": 83,
"questionText": "Scenario: In medical diagnosis using KNN, rare disease cases are underrepresented. Which is a solution?",
"options": [
"Use raw unscaled features",
"Ignore minority class",
"Weighted voting or synthetic oversampling (SMOTE)",
"Reduce k to 1"
],
"correctAnswerIndex": 2,
"explanation": "Weighted voting or synthetic oversampling addresses imbalance and improves prediction of rare cases."
},
{
"id": 84,
"questionText": "Which technique reduces distance computation in high-dimensional KNN?",
"options": [
"Random shuffling",
"Adding irrelevant features",
"Dimensionality reduction",
"Increasing k to n"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensions reduces number of calculations and improves neighbor relevance."
},
{
"id": 85,
"questionText": "Scenario: In KNN regression, a few extreme neighbor values exist. What is the impact?",
"options": [
"Prediction unaffected",
"Predicted value may be skewed unless weighted",
"Accuracy improves automatically",
"KNN fails completely"
],
"correctAnswerIndex": 1,
"explanation": "Outliers can bias the predicted mean; using weighted averaging mitigates this effect."
},
{
"id": 86,
"questionText": "What is a benefit of KD-Tree in KNN?",
"options": [
"Reduces neighbor search complexity in low dimensions",
"Reduces bias of model",
"Automatically scales features",
"Increases training time significantly"
],
"correctAnswerIndex": 0,
"explanation": "KD-Tree allows efficient nearest neighbor search in low to moderate dimensions."
},
{
"id": 87,
"questionText": "Scenario: KNN is applied on time-series data without preprocessing. What is a potential problem?",
"options": [
"Outliers are automatically removed",
"Accuracy automatically improves",
"Distance metric ignores temporal order",
"KNN predicts trends perfectly"
],
"correctAnswerIndex": 2,
"explanation": "KNN does not account for temporal order; raw time-series may not capture pattern similarity properly."
},
{
"id": 88,
"questionText": "Which scenario illustrates KNN's limitation?",
"options": [
"Balanced, low-dimensional data",
"Normalized dataset",
"A new point far from all existing points",
"Few noise-free features"
],
"correctAnswerIndex": 2,
"explanation": "When a point is far from all neighbors, KNN cannot predict reliably."
},
{
"id": 89,
"questionText": "Scenario: KNN used for text document classification with TF-IDF vectors. Which step is crucial?",
"options": [
"Increase k to dataset size",
"Adding irrelevant terms",
"Ignore vector scaling",
"L2 normalization to make distances comparable"
],
"correctAnswerIndex": 3,
"explanation": "TF-IDF vectors should be normalized to ensure fair distance computation."
},
{
"id": 90,
"questionText": "Scenario: KNN struggles with overlapping clusters in feature space. What is a solution?",
"options": [
"Use feature engineering to separate clusters",
"Ignore scaling",
"Increase k arbitrarily",
"Remove minority points"
],
"correctAnswerIndex": 0,
"explanation": "Engineering features that better separate classes improves KNN accuracy."
},
{
"id": 91,
"questionText": "Which approach can improve KNN in very large datasets without losing much accuracy?",
"options": [
"Use approximate nearest neighbor search",
"Increase k to dataset size",
"Ignore preprocessing",
"Add random noise"
],
"correctAnswerIndex": 0,
"explanation": "Approximate search reduces computation while keeping predictions close to exact KNN."
},
{
"id": 92,
"questionText": "Scenario: Online KNN requires predictions every second. Challenge?",
"options": [
"Cannot handle numeric data",
"KNN scales features automatically",
"High latency due to full distance computation",
"Overfitting automatically"
],
"correctAnswerIndex": 2,
"explanation": "Real-time prediction is slow because KNN computes distance to all points at query time."
},
{
"id": 93,
"questionText": "Scenario: Multi-class KNN with imbalanced classes. What can improve fairness?",
"options": [
"Use k=1 always",
"Random shuffling",
"Distance-weighted voting",
"Ignore minority classes"
],
"correctAnswerIndex": 2,
"explanation": "Weighted voting ensures closer neighbors have more influence, improving minority class predictions."
},
{
"id": 94,
"questionText": "Scenario: A KNN model is deployed for anomaly detection. Limitation?",
"options": [
"Rare anomalies may have no close neighbors",
"Weighted KNN solves all issues",
"Accuracy improves automatically",
"Feature scaling is irrelevant"
],
"correctAnswerIndex": 0,
"explanation": "If anomalies are isolated, KNN cannot detect them due to lack of nearby points."
},
{
"id": 95,
"questionText": "Scenario: In high-dimensional image retrieval, KNN prediction is slow. Solution?",
"options": [
"Use raw pixel vectors",
"Use approximate nearest neighbor algorithms like FAISS",
"Increase k arbitrarily",
"Ignore normalization"
],
"correctAnswerIndex": 1,
"explanation": "Approximate algorithms reduce computation significantly while maintaining retrieval quality."
},
{
"id": 96,
"questionText": "Which scenario can lead to KNN overfitting?",
"options": [
"Very small k and noisy data",
"Large k with clean data",
"Normalized features",
"Weighted voting"
],
"correctAnswerIndex": 0,
"explanation": "Small k may fit noise and outliers, causing overfitting."
},
{
"id": 97,
"questionText": "Scenario: KNN regression for house prices with outlier houses. Best approach?",
"options": [
"Increase k arbitrarily",
"Remove scaling",
"Simple mean ignoring distances",
"Weighted averaging by distance"
],
"correctAnswerIndex": 3,
"explanation": "Weighted averaging reduces outlier impact, giving closer neighbors more influence."
},
{
"id": 98,
"questionText": "Scenario: KNN applied to large sparse matrix of user ratings. Challenge?",
"options": [
"Distance metric fails",
"High memory usage and computation",
"Overfitting automatically",
"Minority class ignored"
],
"correctAnswerIndex": 1,
"explanation": "Sparse matrices require storing many zeros and computing many distances, which is expensive."
},
{
"id": 99,
"questionText": "Scenario: Real-time KNN requires prediction in milliseconds. Solution?",
"options": [
"Use weighted voting only",
"Use approximate nearest neighbor search",
"Increase k to n",
"Ignore feature scaling"
],
"correctAnswerIndex": 1,
"explanation": "Approximate methods like Annoy or FAISS significantly speed up prediction for large datasets."
},
{
"id": 100,
"questionText": "Scenario: High-dimensional text KNN classification. Which step is crucial?",
"options": [
"Ignore scaling",
"Dimensionality reduction or normalization",
"Add random features",
"Use raw text vectors"
],
"correctAnswerIndex": 1,
"explanation": "High-dimensional text vectors suffer from the curse of dimensionality; normalization (like L2) or dimensionality reduction is needed to make distances meaningful."
}
]
}