File size: 48,112 Bytes
0d00d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 |
{
"title": "Principal Component Analysis (PCA) Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Principal Component Analysis (PCA), from basic concepts to advanced applications in dimensionality reduction and feature engineering.",
"questions": [
{
"id": 1,
"questionText": "What is the main goal of Principal Component Analysis (PCA)?",
"options": [
"To classify data into categories",
"To generate random features",
"To cluster data points",
"To reduce the dimensionality of a dataset while retaining most variance"
],
"correctAnswerIndex": 3,
"explanation": "PCA aims to reduce the number of variables in a dataset while preserving as much variability as possible."
},
{
"id": 2,
"questionText": "In PCA, what does a 'principal component' represent?",
"options": [
"The cluster center",
"An original feature in the dataset",
"A new uncorrelated feature that captures maximum variance",
"The mean of all features"
],
"correctAnswerIndex": 2,
"explanation": "Principal components are linear combinations of original features that are uncorrelated and ordered by the amount of variance they capture."
},
{
"id": 3,
"questionText": "What is the first step before applying PCA?",
"options": [
"Standardize or normalize the data",
"Train a classifier",
"Apply k-means clustering",
"Remove outliers only"
],
"correctAnswerIndex": 0,
"explanation": "Standardization ensures that features with different scales contribute equally to the principal components."
},
{
"id": 4,
"questionText": "Scenario: You have 10 features, but 90% of the variance is captured in 2 components. What can you do?",
"options": [
"Keep all 10 features",
"Apply dropout",
"Add more features",
"Reduce the dataset to 2 principal components"
],
"correctAnswerIndex": 3,
"explanation": "Reducing to 2 principal components retains most of the information while simplifying the dataset."
},
{
"id": 5,
"questionText": "Which mathematical technique is commonly used to compute PCA?",
"options": [
"Eigen decomposition of the covariance matrix",
"Linear regression",
"Gradient descent",
"k-Nearest Neighbors"
],
"correctAnswerIndex": 0,
"explanation": "PCA typically involves computing eigenvectors and eigenvalues of the covariance matrix to find principal components."
},
{
"id": 6,
"questionText": "What property do all principal components have?",
"options": [
"They are uncorrelated (orthogonal) to each other",
"They sum to zero",
"They are dependent on each other",
"They always have the same variance"
],
"correctAnswerIndex": 0,
"explanation": "Principal components are constructed to be orthogonal, ensuring no redundancy in the information they capture."
},
{
"id": 7,
"questionText": "Scenario: PCA applied on a dataset with features in different scales. What happens if you don’t standardize?",
"options": [
"The first component captures zero variance",
"PCA fails to compute",
"Features with larger scale dominate the principal components",
"Variance is automatically normalized"
],
"correctAnswerIndex": 2,
"explanation": "Without standardization, features with larger numeric ranges contribute more to variance, skewing PCA results."
},
{
"id": 8,
"questionText": "Which of the following can PCA NOT do?",
"options": [
"Remove correlated features",
"Reduce dimensionality",
"Improve classification accuracy directly",
"Visualize high-dimensional data"
],
"correctAnswerIndex": 2,
"explanation": "PCA is unsupervised and reduces dimensionality; it does not directly improve classification accuracy."
},
{
"id": 9,
"questionText": "Scenario: You apply PCA and find that the first principal component explains 70% variance, the second 20%, and the rest 10%. How many components would you keep to retain 90% variance?",
"options": [
"All components",
"One component",
"Two components",
"Three components"
],
"correctAnswerIndex": 2,
"explanation": "Adding the first two components (70% + 20%) captures 90% of the dataset variance."
},
{
"id": 10,
"questionText": "PCA is an example of which type of learning?",
"options": [
"Supervised learning",
"Unsupervised learning",
"Reinforcement learning",
"Semi-supervised learning"
],
"correctAnswerIndex": 1,
"explanation": "PCA does not use labels; it finds patterns based on feature correlations, making it unsupervised."
},
{
"id": 11,
"questionText": "Scenario: You perform PCA and transform your 5D data to 3D. What property is guaranteed?",
"options": [
"Classification accuracy improves",
"The 3 components capture the maximum possible variance in 3D",
"All data points remain equidistant",
"The original features are perfectly preserved"
],
"correctAnswerIndex": 1,
"explanation": "PCA selects components capturing maximum variance in the reduced dimensions, not necessarily preserving original distances."
},
{
"id": 12,
"questionText": "Which PCA component explains the least variance?",
"options": [
"All components explain equal variance",
"Any intermediate component",
"The last principal component",
"The first principal component"
],
"correctAnswerIndex": 2,
"explanation": "PCA orders components from highest to lowest variance."
},
{
"id": 13,
"questionText": "Scenario: Dataset is already perfectly uncorrelated. What effect does PCA have?",
"options": [
"PCA fails to compute",
"Principal components are the same as original features",
"PCA increases correlation",
"PCA reduces variance"
],
"correctAnswerIndex": 1,
"explanation": "If features are uncorrelated, PCA simply aligns components with original features without reducing dimensionality."
},
{
"id": 14,
"questionText": "Scenario: You want to visualize high-dimensional data in 2D. PCA is applied. What is the risk?",
"options": [
"Data labels change",
"All variance is retained",
"Some variance is lost",
"Original features are unchanged"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensions to 2D inevitably loses some information (variance)."
},
{
"id": 15,
"questionText": "Which of the following matrices is symmetric and used in PCA?",
"options": [
"Distance matrix",
"Covariance matrix",
"Adjacency matrix",
"Correlation matrix"
],
"correctAnswerIndex": 1,
"explanation": "The covariance matrix is symmetric and serves as the basis for eigen decomposition in PCA."
},
{
"id": 16,
"questionText": "Scenario: You have highly correlated features. PCA is applied. Effect?",
"options": [
"Normalizes variance",
"Reduces redundancy by combining correlated features into fewer components",
"Removes labels",
"Increases correlation"
],
"correctAnswerIndex": 1,
"explanation": "PCA transforms correlated features into uncorrelated principal components, reducing redundancy."
},
{
"id": 17,
"questionText": "Scenario: You want to reduce noise in data. PCA helps by:",
"options": [
"Scaling the first component only",
"Adding more features",
"Increasing learning rate",
"Discarding components with low variance"
],
"correctAnswerIndex": 3,
"explanation": "Low-variance components often represent noise; removing them denoises the dataset."
},
{
"id": 18,
"questionText": "Scenario: Eigenvalues of a PCA covariance matrix are [4, 2, 1]. Which component explains the most variance?",
"options": [
"Second component",
"All equally",
"First component",
"Third component"
],
"correctAnswerIndex": 2,
"explanation": "Eigenvalues correspond to the variance captured; the largest eigenvalue indicates the component with most variance."
},
{
"id": 19,
"questionText": "Scenario: You apply PCA to a dataset and plot explained variance ratio. Purpose?",
"options": [
"Scale features",
"Compute correlation matrix",
"Determine how many components to keep",
"Train a classifier"
],
"correctAnswerIndex": 2,
"explanation": "Explained variance ratio helps decide how many components capture sufficient information."
},
{
"id": 20,
"questionText": "Which PCA component is always orthogonal to the first component?",
"options": [
"It may not be orthogonal",
"Third component",
"Second component",
"Last component only"
],
"correctAnswerIndex": 2,
"explanation": "By definition, each principal component is orthogonal to all previous components."
},
{
"id": 21,
"questionText": "Scenario: After PCA, some components have very small variance. Action?",
"options": [
"They must be retained",
"Scale them up",
"Add noise to them",
"They can be discarded"
],
"correctAnswerIndex": 3,
"explanation": "Components with negligible variance contribute little to data representation and can be removed."
},
{
"id": 22,
"questionText": "Scenario: PCA applied to non-linear data. Limitation?",
"options": [
"PCA generates labels",
"PCA cannot capture non-linear relationships",
"PCA increases variance",
"PCA overfits"
],
"correctAnswerIndex": 1,
"explanation": "Standard PCA is linear and cannot model complex non-linear structures; kernel PCA may be used instead."
},
{
"id": 23,
"questionText": "Scenario: PCA reduces features from 5D to 2D. Data reconstruction is approximate. Why?",
"options": [
"Labels change",
"Information is lost in discarded components",
"PCA adds noise",
"Variance is increased"
],
"correctAnswerIndex": 1,
"explanation": "Dimensionality reduction retains only top components, losing some original information."
},
{
"id": 24,
"questionText": "Which metric is used to measure how much variance is captured by selected components?",
"options": [
"Mean squared error",
"Correlation coefficient",
"Explained variance ratio",
"Euclidean distance"
],
"correctAnswerIndex": 2,
"explanation": "Explained variance ratio shows the proportion of total variance captured by each principal component."
},
{
"id": 25,
"questionText": "Scenario: PCA on standardized data vs. unstandardized data. Difference?",
"options": [
"Standardization reduces variance",
"Standardized data gives equal weight to all features",
"Unstandardized data improves variance capture",
"No difference"
],
"correctAnswerIndex": 1,
"explanation": "Standardization prevents features with large scales from dominating the PCA components."
},
{
"id": 26,
"questionText": "Scenario: Two features are perfectly correlated. PCA effect?",
"options": [
"Cannot perform PCA",
"One component captures the shared variance",
"Both components are kept equally",
"Variance becomes zero"
],
"correctAnswerIndex": 1,
"explanation": "PCA combines correlated features into a single principal component."
},
{
"id": 27,
"questionText": "Scenario: You want to visualize 3D data in 2D. PCA helps by:",
"options": [
"Adding more dimensions",
"Projecting onto top 2 principal components",
"Scaling features only",
"Generating new labels"
],
"correctAnswerIndex": 1,
"explanation": "Projection onto top principal components preserves as much variance as possible in reduced dimensions."
},
{
"id": 28,
"questionText": "Scenario: After PCA, you notice negative values in transformed features. Meaning?",
"options": [
"PCA failed",
"Original data must be negative",
"Data must be scaled again",
"Principal components can have negative and positive values"
],
"correctAnswerIndex": 3,
"explanation": "PCA components are linear combinations of original features, allowing both negative and positive values."
},
{
"id": 29,
"questionText": "Scenario: You apply PCA for feature selection. Best approach?",
"options": [
"Discard largest components",
"Select random components",
"Keep all features",
"Select top components explaining desired variance"
],
"correctAnswerIndex": 3,
"explanation": "Top principal components capture most variance and are most informative for feature selection."
},
{
"id": 30,
"questionText": "Scenario: PCA is applied to a dataset with 100 features. First 10 components explain 95% variance. Next step?",
"options": [
"Use all 100 features",
"Discard the 10 components",
"Add more features",
"Use 10 components for reduced dataset"
],
"correctAnswerIndex": 3,
"explanation": "Using the first 10 components retains 95% of information while reducing dimensionality significantly."
},
{
"id": 31,
"questionText": "Scenario: You apply PCA but some features dominate due to large variance. Solution?",
"options": [
"Standardize the features",
"Remove features with high variance",
"Apply k-means clustering",
"Reduce dataset size"
],
"correctAnswerIndex": 0,
"explanation": "Standardizing ensures all features contribute equally, preventing dominance of large-scale features."
},
{
"id": 32,
"questionText": "Scenario: PCA eigenvalues are [5, 2, 0.5, 0.1]. What does the smallest eigenvalue indicate?",
"options": [
"Largest variance",
"Component is most important",
"Least variance along that component",
"PCA failed"
],
"correctAnswerIndex": 2,
"explanation": "Eigenvalues represent variance along principal components; the smallest captures minimal variance."
},
{
"id": 33,
"questionText": "Scenario: Two components have similar eigenvalues. What to do?",
"options": [
"Discard one randomly",
"Keep both as they explain similar variance",
"Combine them manually",
"Always choose the first"
],
"correctAnswerIndex": 1,
"explanation": "Similar eigenvalues indicate both components carry significant information; both should be retained."
},
{
"id": 34,
"questionText": "Scenario: You want to analyze which original features contribute to a principal component. Technique?",
"options": [
"Check explained variance ratio only",
"Remove low variance features",
"Examine component loadings (eigenvectors)",
"Normalize the dataset"
],
"correctAnswerIndex": 2,
"explanation": "Loadings show the weight of each original feature in a principal component, indicating contribution."
},
{
"id": 35,
"questionText": "Scenario: PCA applied on correlation matrix vs covariance matrix. Difference?",
"options": [
"Correlation matrix standardizes features; covariance matrix uses original scale",
"No difference",
"Covariance matrix reduces variance",
"Correlation matrix increases variance"
],
"correctAnswerIndex": 0,
"explanation": "Correlation matrix accounts for differing scales by standardizing variables before computing PCA."
},
{
"id": 36,
"questionText": "Scenario: PCA is applied to noisy data. Effect of noise?",
"options": [
"Noise appears in low-variance components",
"Noise improves variance",
"Noise is amplified in all components",
"Noise disappears automatically"
],
"correctAnswerIndex": 0,
"explanation": "High-frequency noise often contributes little variance and is captured in later components, which can be discarded."
},
{
"id": 37,
"questionText": "Scenario: You want to visualize 4D data in 2D using PCA. Which components to use?",
"options": [
"Last 2 components",
"All 4 components",
"Top 2 principal components",
"Random 2 features"
],
"correctAnswerIndex": 2,
"explanation": "Top components retain most variance, providing the best 2D representation of high-dimensional data."
},
{
"id": 38,
"questionText": "Scenario: PCA applied on dataset with zero mean. Why mean-centering?",
"options": [
"Normalizes labels",
"Increases variance artificially",
"Reduces number of features",
"Ensures first component captures maximum variance from origin"
],
"correctAnswerIndex": 3,
"explanation": "Mean-centering removes bias and ensures principal components represent variance relative to the mean."
},
{
"id": 39,
"questionText": "Scenario: PCA applied but first component explains only 20% variance. Interpretation?",
"options": [
"First component is irrelevant",
"Data variance is spread across many components",
"Reduce dataset size",
"PCA failed"
],
"correctAnswerIndex": 1,
"explanation": "Low variance in first component indicates no single direction dominates; variance is more uniform across features."
},
{
"id": 40,
"questionText": "Scenario: You perform PCA on features measured in different units. Why important?",
"options": [
"To generate labels",
"To increase explained variance",
"To remove features",
"To prevent unit differences from skewing components"
],
"correctAnswerIndex": 3,
"explanation": "Standardization equalizes units, ensuring PCA reflects intrinsic data structure rather than measurement scale."
},
{
"id": 41,
"questionText": "Scenario: Projecting data back from reduced PCA components to original space. Effect?",
"options": [
"Perfect reconstruction always",
"Approximate reconstruction with some information loss",
"Increase variance",
"Remove correlations"
],
"correctAnswerIndex": 1,
"explanation": "Dimensionality reduction discards minor components, so reconstruction is approximate."
},
{
"id": 42,
"questionText": "Scenario: PCA on sparse high-dimensional data. Which method can help?",
"options": [
"Sparse PCA",
"Random projection",
"Feature scaling",
"Standard PCA only"
],
"correctAnswerIndex": 0,
"explanation": "Sparse PCA introduces sparsity constraints to handle high-dimensional data efficiently."
},
{
"id": 43,
"questionText": "Scenario: Kernel PCA vs standard PCA. Advantage?",
"options": [
"Kernel PCA reduces variance",
"Kernel PCA does not reduce dimensions",
"Kernel PCA captures non-linear patterns",
"Standard PCA is better for non-linear data"
],
"correctAnswerIndex": 2,
"explanation": "Kernel PCA uses kernel functions to capture non-linear relationships, unlike linear PCA."
},
{
"id": 44,
"questionText": "Scenario: PCA applied to dataset, but covariance matrix is singular. Cause?",
"options": [
"Data standardized",
"Number of features > number of samples",
"Explained variance too high",
"Too few features"
],
"correctAnswerIndex": 1,
"explanation": "A singular covariance matrix occurs when the data matrix has more features than samples, causing linear dependency."
},
{
"id": 45,
"questionText": "Scenario: After PCA, a component has zero eigenvalue. Meaning?",
"options": [
"Data is invalid",
"No variance along this component",
"PCA failed",
"Most important component"
],
"correctAnswerIndex": 1,
"explanation": "Zero eigenvalue indicates the component captures no variation in the dataset."
},
{
"id": 46,
"questionText": "Scenario: PCA used for feature reduction in regression. Benefit?",
"options": [
"Removes labels",
"Increases overfitting",
"Generates noise",
"Reduces multicollinearity and model complexity"
],
"correctAnswerIndex": 3,
"explanation": "PCA produces uncorrelated features, mitigating multicollinearity and simplifying models."
},
{
"id": 47,
"questionText": "Scenario: PCA shows first 3 components explain 85% variance. Choice?",
"options": [
"Keep 3 components for reduced dataset",
"Keep only first",
"Discard all 3",
"Keep all original features"
],
"correctAnswerIndex": 0,
"explanation": "Top components capturing majority variance are sufficient for dimensionality reduction."
},
{
"id": 48,
"questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?",
"options": [
"Important variance discarded in low components",
"Features were not standardized",
"Too few principal components retained",
"All of the above"
],
"correctAnswerIndex": 3,
"explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features."
},
{
"id": 49,
"questionText": "Scenario: PCA on image dataset. First component represents lighting changes. Meaning?",
"options": [
"Reduce dataset size",
"Lighting has no effect",
"Largest variance is due to lighting, not object content",
"PCA failed"
],
"correctAnswerIndex": 2,
"explanation": "PCA captures directions of maximal variance; if lighting dominates, first component encodes lighting."
},
{
"id": 50,
"questionText": "Scenario: You need interpretability for principal components. Technique?",
"options": [
"Examine feature loadings",
"Discard low variance components",
"Use explained variance only",
"Standardize data"
],
"correctAnswerIndex": 0,
"explanation": "Loadings show contribution of each original feature to principal components, aiding interpretation."
},
{
"id": 51,
"questionText": "Scenario: PCA applied on time-series features. Issue?",
"options": [
"Eigenvalues become negative",
"Labels are affected",
"Variance is increased",
"Temporal structure may be lost"
],
"correctAnswerIndex": 3,
"explanation": "Standard PCA ignores sequence information; temporal relationships may not be preserved."
},
{
"id": 52,
"questionText": "Scenario: Dataset contains categorical variables. PCA requirement?",
"options": [
"Convert to numerical via encoding",
"Remove them",
"No change required",
"Use labels directly"
],
"correctAnswerIndex": 0,
"explanation": "PCA requires numeric input; categorical features must be encoded first (e.g., one-hot encoding)."
},
{
"id": 53,
"questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?",
"options": [
"Increases data size",
"Reduces noise and speeds computation",
"Removes clusters",
"Generates labels"
],
"correctAnswerIndex": 1,
"explanation": "PCA simplifies data, removes redundant features, and accelerates clustering algorithms."
},
{
"id": 54,
"questionText": "Scenario: After PCA, components are used in regression. Advantage?",
"options": [
"Remove labels",
"Avoid multicollinearity and improve stability",
"Increases overfitting",
"Increase computation"
],
"correctAnswerIndex": 1,
"explanation": "Principal components are uncorrelated, reducing multicollinearity in regression."
},
{
"id": 55,
"questionText": "Scenario: PCA applied to normalized vs standardized features. Impact?",
"options": [
"No impact",
"Normalized features fail PCA",
"Standardization is crucial for unequal scales",
"Variance is reduced"
],
"correctAnswerIndex": 2,
"explanation": "Features with different scales must be standardized; normalization alone may not equalize contribution."
},
{
"id": 56,
"questionText": "Scenario: First principal component explains 40% variance, second 25%. What % variance is left?",
"options": [
"40%",
"35%",
"25%",
"65%"
],
"correctAnswerIndex": 1,
"explanation": "Remaining variance = 100% - (40% + 25%) = 35%."
},
{
"id": 57,
"questionText": "Scenario: PCA used on finance dataset. First component dominated by one stock. Meaning?",
"options": [
"Data should be reduced",
"This stock has highest variance in data",
"Stock is irrelevant",
"PCA failed"
],
"correctAnswerIndex": 1,
"explanation": "Principal components capture directions of maximum variance; one high-variance stock can dominate."
},
{
"id": 58,
"questionText": "Scenario: PCA applied on small dataset. Risk?",
"options": [
"PCA fails",
"Components become identical",
"Overfitting and noisy components",
"Variance increases"
],
"correctAnswerIndex": 2,
"explanation": "Small datasets may produce unstable covariance estimates, leading to noisy components."
},
{
"id": 59,
"questionText": "Scenario: You want to project new data using previously fitted PCA. Method?",
"options": [
"Cannot project new data",
"Recompute PCA",
"Multiply new data by learned component matrix",
"Use labels only"
],
"correctAnswerIndex": 2,
"explanation": "New data can be projected by applying the PCA transformation learned from training data."
},
{
"id": 60,
"questionText": "Scenario: PCA shows negative loadings. Meaning?",
"options": [
"Error in computation",
"Variance is negative",
"Feature removed",
"Feature negatively correlates with component"
],
"correctAnswerIndex": 3,
"explanation": "Negative loadings indicate the original feature moves in opposite direction to the component."
},
{
"id": 61,
"questionText": "Scenario: PCA applied to very high-dimensional genomic data. Challenge?",
"options": [
"Variance is too high",
"Cannot compute eigenvectors",
"Labels cannot be used",
"Covariance matrix may be singular or noisy"
],
"correctAnswerIndex": 3,
"explanation": "High dimensionality with few samples can make the covariance matrix singular and PCA unstable."
},
{
"id": 62,
"questionText": "Scenario: PCA on dataset with outliers. Effect?",
"options": [
"PCA removes outliers",
"Outliers may distort principal components",
"Outliers are ignored automatically",
"Variance increases uniformly"
],
"correctAnswerIndex": 1,
"explanation": "Outliers can dominate variance, affecting directions of principal components."
},
{
"id": 63,
"questionText": "Scenario: PCA applied for compression. Target explained variance?",
"options": [
"Keep all components",
"Keep only first component",
"Choose enough components to capture 90–95% variance",
"Discard top components"
],
"correctAnswerIndex": 2,
"explanation": "Selecting components that retain most variance ensures compression without losing significant information."
},
{
"id": 64,
"questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?",
"options": [
"PCA cannot be applied to numeric data",
"Too few principal components retained",
"All of the above",
"Features were not standardized",
"Important variance discarded in low components"
],
"correctAnswerIndex": 2,
"explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features."
},
{
"id": 65,
"questionText": "Scenario: PCA used for exploratory analysis. Benefit?",
"options": [
"Removes labels",
"Increases dimensionality",
"Generates random features",
"Reveals patterns, clusters, and correlations"
],
"correctAnswerIndex": 3,
"explanation": "PCA simplifies data and highlights underlying patterns or groupings."
},
{
"id": 66,
"questionText": "Scenario: PCA reduces dataset from 50 to 10 features. Effect on storage?",
"options": [
"Significant reduction in storage and computation",
"Increases storage",
"No change",
"Removes labels"
],
"correctAnswerIndex": 0,
"explanation": "Fewer features reduce memory usage and accelerate computations."
},
{
"id": 67,
"questionText": "Scenario: PCA used on correlation matrix. Advantage?",
"options": [
"Reduces variance",
"Generates labels",
"Equalizes feature scales and emphasizes relative relationships",
"Removes low-variance features only"
],
"correctAnswerIndex": 2,
"explanation": "Correlation matrix ensures features with different units or scales contribute proportionally to PCA."
},
{
"id": 68,
"questionText": "Scenario: After PCA, some features have nearly zero loadings across components. Meaning?",
"options": [
"Variance is too high",
"These features contribute little variance and can be discarded",
"They are most important",
"PCA failed"
],
"correctAnswerIndex": 1,
"explanation": "Features with negligible loadings do not influence principal components significantly."
},
{
"id": 69,
"questionText": "Scenario: PCA applied to dataset with correlated noise. Effect?",
"options": [
"All variance captured by noise",
"Noise may form separate low-variance components",
"PCA fails",
"Noise dominates first component"
],
"correctAnswerIndex": 1,
"explanation": "Correlated noise often appears in later components with low variance."
},
{
"id": 70,
"questionText": "Scenario: You want to reduce dimensionality without losing much information. PCA strategy?",
"options": [
"Use all components",
"Keep enough components to capture desired variance (e.g., 90–95%)",
"Keep only first component",
"Discard components randomly"
],
"correctAnswerIndex": 1,
"explanation": "Selecting enough principal components ensures dimensionality reduction while retaining most data information."
},
{
"id": 71,
"questionText": "Scenario: Kernel PCA is used instead of standard PCA. Benefit?",
"options": [
"Reduces dimensionality linearly",
"Captures non-linear relationships in the data",
"Removes outliers automatically",
"Generates labels"
],
"correctAnswerIndex": 1,
"explanation": "Kernel PCA uses kernel functions to map data into higher-dimensional space to capture non-linear patterns."
},
{
"id": 72,
"questionText": "Scenario: Sparse PCA is applied on high-dimensional genomic data. Advantage?",
"options": [
"Improves label prediction automatically",
"Maximizes variance only",
"Generates components with few non-zero loadings for interpretability",
"Removes all correlations"
],
"correctAnswerIndex": 2,
"explanation": "Sparse PCA introduces sparsity constraints, creating components influenced by fewer original features for easier interpretation."
},
{
"id": 73,
"questionText": "Scenario: PCA is applied to compress image data. How to measure quality of compression?",
"options": [
"Variance ratio only",
"Correlation of first component with pixels",
"Reconstruction error (difference between original and reconstructed images)",
"Number of components kept"
],
"correctAnswerIndex": 2,
"explanation": "Reconstruction error quantifies information loss during dimensionality reduction, evaluating compression quality."
},
{
"id": 74,
"questionText": "Scenario: PCA applied to multicollinear financial features. Effect?",
"options": [
"Increases collinearity",
"Reduces multicollinearity by generating uncorrelated components",
"Removes variance",
"Generates labels"
],
"correctAnswerIndex": 1,
"explanation": "Principal components are orthogonal, addressing multicollinearity issues in regression or predictive models."
},
{
"id": 75,
"questionText": "Scenario: PCA applied on large sparse document-term matrix. Challenge?",
"options": [
"PCA cannot be applied",
"High dimensionality and sparsity require optimized algorithms",
"All features dominate equally",
"Variance becomes negative"
],
"correctAnswerIndex": 1,
"explanation": "Sparse high-dimensional data may need techniques like randomized PCA to efficiently compute components."
},
{
"id": 76,
"questionText": "Scenario: PCA applied to time-series data. Concern?",
"options": [
"Labels are changed",
"Temporal correlations may be ignored",
"PCA reduces samples",
"Variance increases"
],
"correctAnswerIndex": 1,
"explanation": "PCA does not account for order in sequences; temporal patterns may be lost."
},
{
"id": 77,
"questionText": "Scenario: PCA reduces 100 features to 5 components. Downstream model performance drops. Likely cause?",
"options": [
"All of the above",
"Data not standardized",
"Too few samples",
"Important low-variance features were discarded"
],
"correctAnswerIndex": 0,
"explanation": "Discarding low-variance features may remove predictive information; other preprocessing issues can also affect performance."
},
{
"id": 78,
"questionText": "Scenario: PCA is used for anomaly detection. Approach?",
"options": [
"Discard all components",
"Use first component only",
"Model normal data with top components and examine reconstruction error",
"Apply PCA on labels"
],
"correctAnswerIndex": 2,
"explanation": "Anomalies often lie in directions of low variance; reconstruction error from PCA can identify unusual data points."
},
{
"id": 79,
"questionText": "Scenario: You apply PCA on a dataset with missing values. Best approach?",
"options": [
"PCA fills missing values automatically",
"Ignore missing values",
"Impute missing values before PCA",
"Discard rows with missing values"
],
"correctAnswerIndex": 2,
"explanation": "PCA requires complete numerical data; missing values must be imputed or handled before applying PCA."
},
{
"id": 80,
"questionText": "Scenario: PCA applied and first component aligns with single feature. Interpretation?",
"options": [
"This feature dominates variance in the dataset",
"Component is irrelevant",
"All features are equally important",
"PCA failed"
],
"correctAnswerIndex": 0,
"explanation": "When a single feature dominates variance, the first principal component aligns closely with that feature."
},
{
"id": 81,
"questionText": "Scenario: You perform PCA and observe negative explained variance ratio for a component. Reason?",
"options": [
"Data was not mean-centered properly",
"Eigenvectors are invalid",
"Variance is negative",
"PCA cannot run on this data"
],
"correctAnswerIndex": 0,
"explanation": "Improper centering can lead to incorrect covariance matrix, causing negative variance calculations."
},
{
"id": 82,
"questionText": "Scenario: PCA applied on data with categorical features encoded as one-hot. Concern?",
"options": [
"Variance decreases automatically",
"Labels are affected",
"Components become identical",
"High dimensionality may lead to sparse components"
],
"correctAnswerIndex": 3,
"explanation": "One-hot encoding increases dimensions, producing sparse data; special handling or sparse PCA may be useful."
},
{
"id": 83,
"questionText": "Scenario: After PCA, you plot a biplot. Purpose?",
"options": [
"Scale data",
"Generate labels",
"Visualize principal components and feature contributions",
"Remove low-variance components"
],
"correctAnswerIndex": 2,
"explanation": "Biplots show both projected data points and how original features contribute to components."
},
{
"id": 84,
"questionText": "Scenario: PCA applied on standardized vs non-standardized data with different scales. Outcome?",
"options": [
"Standardization ensures fair contribution; non-standardized may bias components",
"Non-standardized data improves variance",
"No difference",
"Variance is reduced in standardized data"
],
"correctAnswerIndex": 0,
"explanation": "Features with large scales dominate components without standardization, skewing PCA results."
},
{
"id": 85,
"questionText": "Scenario: You want interpretable PCA components. Which approach?",
"options": [
"Keep all components",
"Sparse PCA or rotation methods like varimax",
"Use first component only",
"Discard low-variance features"
],
"correctAnswerIndex": 1,
"explanation": "Sparse PCA and rotation techniques improve interpretability by reducing the number of features contributing to each component."
},
{
"id": 86,
"questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?",
"options": [
"Removes noise, reduces computation, highlights clusters",
"Increases dimensionality",
"Generates labels",
"Removes clusters"
],
"correctAnswerIndex": 0,
"explanation": "Reduced, de-noised features simplify clustering and often improve performance."
},
{
"id": 87,
"questionText": "Scenario: PCA eigenvectors are not unique. Reason?",
"options": [
"PCA failed",
"Covariance matrix invalid",
"Eigenvectors are unique up to sign; direction can flip",
"Variance negative"
],
"correctAnswerIndex": 2,
"explanation": "Eigenvectors can be multiplied by -1 without changing the subspace, so they are not unique in sign."
},
{
"id": 88,
"questionText": "Scenario: PCA applied to data where noise dominates variance. Risk?",
"options": [
"Components may represent noise rather than signal",
"Variance reduces",
"All information preserved",
"Components become identical"
],
"correctAnswerIndex": 0,
"explanation": "High-variance noise can dominate principal components, reducing meaningful representation of data."
},
{
"id": 89,
"questionText": "Scenario: PCA applied on dataset with many features having zero variance. Effect?",
"options": [
"These features are ignored in covariance computation",
"PCA fails",
"Variance increases",
"Components become identical"
],
"correctAnswerIndex": 0,
"explanation": "Zero-variance features do not contribute to covariance and do not affect PCA results."
},
{
"id": 90,
"questionText": "Scenario: PCA applied to compress hyperspectral image. Key consideration?",
"options": [
"Retain components capturing most spectral variance for accurate reconstruction",
"Discard high-variance components",
"Keep only first component",
"Generate labels automatically"
],
"correctAnswerIndex": 0,
"explanation": "Hyperspectral data has many correlated channels; top components capture essential information while reducing data size."
},
{
"id": 91,
"questionText": "Scenario: PCA applied and first component is dominated by outlier. Solution?",
"options": [
"Keep data as-is",
"Remove or mitigate outliers before PCA",
"Scale only first feature",
"Discard PCA entirely"
],
"correctAnswerIndex": 1,
"explanation": "Outliers can skew variance and principal directions; handling them ensures meaningful PCA components."
},
{
"id": 92,
"questionText": "Scenario: You need to project streaming data onto PCA components. Requirement?",
"options": [
"Project only first sample",
"Cannot apply PCA",
"Recompute PCA each time",
"Use incremental PCA or precomputed components"
],
"correctAnswerIndex": 3,
"explanation": "Incremental PCA allows efficient projection of new data without recomputing from scratch."
},
{
"id": 93,
"questionText": "Scenario: PCA reduces dimensionality but variance explained is too low. Solution?",
"options": [
"Remove first component",
"Normalize data again",
"Keep more components",
"Discard components"
],
"correctAnswerIndex": 2,
"explanation": "Increasing number of components ensures more variance is retained for downstream tasks."
},
{
"id": 94,
"questionText": "Scenario: PCA applied on correlated features with different scales. Effect if not standardized?",
"options": [
"Variance is evenly distributed",
"Components are orthogonal",
"PCA fails",
"Large-scale features dominate components"
],
"correctAnswerIndex": 3,
"explanation": "Without standardization, features with larger numeric ranges contribute more variance, skewing PCA results."
},
{
"id": 95,
"questionText": "Scenario: PCA used for dimensionality reduction prior to deep learning. Benefit?",
"options": [
"Generates labels",
"Increases overfitting",
"Reduces input size and noise, improving training efficiency",
"Removes all variance"
],
"correctAnswerIndex": 2,
"explanation": "PCA simplifies input features, removing redundant information and reducing computational load."
},
{
"id": 96,
"questionText": "Scenario: PCA applied but some components are highly correlated. Why unusual?",
"options": [
"Principal components should be orthogonal; correlation indicates an issue",
"Variance is low",
"Labels are influencing components",
"Expected in standard PCA"
],
"correctAnswerIndex": 0,
"explanation": "PCA produces orthogonal components; correlated components suggest computation or preprocessing errors."
},
{
"id": 97,
"questionText": "Scenario: PCA applied on multi-class dataset for visualization. Approach?",
"options": [
"Discard labels",
"Keep all features",
"Project onto top 2 or 3 components and color points by class",
"Use only first component"
],
"correctAnswerIndex": 2,
"explanation": "Low-dimensional projection allows visualization of class separation while preserving maximal variance."
},
{
"id": 98,
"questionText": "Scenario: PCA applied with top components explaining 80% variance. Downstream task requires 95%. Action?",
"options": [
"Normalize data again",
"Use only top components",
"Include additional components until 95% variance is captured",
"Discard low components"
],
"correctAnswerIndex": 2,
"explanation": "Selecting enough components ensures sufficient information is retained for downstream analysis."
},
{
"id": 99,
"questionText": "Scenario: PCA applied to text embeddings. Challenge?",
"options": [
"High dimensionality and sparsity require careful computation",
"Variance is negative",
"PCA fails automatically",
"Components lose meaning entirely"
],
"correctAnswerIndex": 0,
"explanation": "Text embeddings are often high-dimensional; PCA helps reduce size but may require optimized algorithms."
},
{
"id": 100,
"questionText": "Scenario: PCA used for feature selection. How to choose components?",
"options": [
"Choose random components",
"Discard high-variance components",
"Use only first component",
"Select components explaining desired variance threshold (e.g., 90–95%)"
],
"correctAnswerIndex": 3,
"explanation": "Selecting top components ensures maximal retained information while reducing dimensionality."
}
]
}
|