lenti237 [~/perltest]>more file4.pl #!/opt/local/bin/perl -w # file.pl $filename = shift; unless (defined($filename) ) { print "What sequence to analyze? "; $filename = ; chomp ($filename); } # open file and associate it with a "filehandle" # if open fails (returns "false"), inform user and end unless (open (PROTEINFILE, $filename)) { print "Cannot open file: $filename \n\n"; exit; } #initialize (set to an initial value) scalar string variable $seq $seq = ""; #while there are unread lines in PROTEINFILE #read one line and, if the first character is a ">", print it # regular expressions (regexps) are enclosed in / /s. # ^ in a regexp means first character # ^> means first character is a ">" # s/ / / means replace (substitute) one regular expression for another # \s means white space (tab, blank space, new line, etc.) # s/\s// means replace a white space character by nothing (delete it) # s/\s//g means do the substitution as many times as possible (globally) # without the g, the substitution is only done once each string # =~ is the binding operator, used to assign regexp to variables while () { if (/^>/) { print $seq,"\n"; $seq = ""; print $_; } else { chomp $_; $_ =~ s/\s//g; $seq = $seq . $_; } # . is the string concatenation operator } print $seq,"\n"; exit; lenti238 [~/perltest]>perl file4.pl What sequence to analyze? tests.pep >A28086 Length: 104 VKQIESKSAFQEVLDSAGDKLVVVDFSATWCGPCKMIKPFFHALSEKFNNVVFIEVDVDDCKDIAAECEVKCMPTFQFFK KGQKVGEFSGANKEKLEATINELL >gi|79882|pir||B29504 hypothetical 18K protein (mer operon) - Staphylococcus aur eus plasmid pI258 MKKRISFTAIMTVLLIGLTACGAESDTANESKVQDIQGNPVSLPNEKPTLIYFMATWCPSCIYNEEIFKEMHQLNPNDVQ LITVSLDPNTDTKESLAKFKQDYGGDWPHVLKNGKEIADTYGVKQLEEIVLVNSENEVFYRSVRPSFDDLKEALTQIGVE L >ORF15 S. aureus pathogenicity island MEDVTNEEVFEMIDSRTGVLNANDWKSQLRRSATTQALKKTTTNAEIILCNDESLKGLVQYDAFEKVTKLKRLPYWRSKG DTNYYWADIDTTHVISHIDKLYNVQFSRDLIDTVIEKEAYQNRFHPIKSMIESKSWDGIKRIETLFIDYLGAEDNHYNRE VTKKWMMGAVARIYQPGIKYDSMIILYGGQGVGKSTAVSKLGGHWYNQSIKTFKGDEVYKKLQGSWICEIEELSAFQKST IEDIKGFISAIVDIYRASYGKRTERHPRQCVFVGTTNNYEFLKDQTGNRRFFPITTDKNKATKSPFDDLTPDVVQQMFAE AKVYFDEDPTDKALLLDKEASEMALKVQEAHSEKDALVGEIEEFLERPIPSDYWYRTLEEKRVSAHDVIDQDYIKLYGDG KLIELPNTKPGAYVWRDKVCSMEIWKVMMKRDDQPQQHHLRKIDKALRNTNYCDTVKKQTRYGEGIGKQYGFSVDLASYY KNLKV lenti239 [~/perltest]>perl file4.pl tests.pep >A28086 Length: 104 VKQIESKSAFQEVLDSAGDKLVVVDFSATWCGPCKMIKPFFHALSEKFNNVVFIEVDVDDCKDIAAECEVKCMPTFQFFK KGQKVGEFSGANKEKLEATINELL >gi|79882|pir||B29504 hypothetical 18K protein (mer operon) - Staphylococcus aur eus plasmid pI258 MKKRISFTAIMTVLLIGLTACGAESDTANESKVQDIQGNPVSLPNEKPTLIYFMATWCPSCIYNEEIFKEMHQLNPNDVQ LITVSLDPNTDTKESLAKFKQDYGGDWPHVLKNGKEIADTYGVKQLEEIVLVNSENEVFYRSVRPSFDDLKEALTQIGVE L >ORF15 S. aureus pathogenicity island MEDVTNEEVFEMIDSRTGVLNANDWKSQLRRSATTQALKKTTTNAEIILCNDESLKGLVQYDAFEKVTKLKRLPYWRSKG DTNYYWADIDTTHVISHIDKLYNVQFSRDLIDTVIEKEAYQNRFHPIKSMIESKSWDGIKRIETLFIDYLGAEDNHYNRE VTKKWMMGAVARIYQPGIKYDSMIILYGGQGVGKSTAVSKLGGHWYNQSIKTFKGDEVYKKLQGSWICEIEELSAFQKST IEDIKGFISAIVDIYRASYGKRTERHPRQCVFVGTTNNYEFLKDQTGNRRFFPITTDKNKATKSPFDDLTPDVVQQMFAE AKVYFDEDPTDKALLLDKEASEMALKVQEAHSEKDALVGEIEEFLERPIPSDYWYRTLEEKRVSAHDVIDQDYIKLYGDG KLIELPNTKPGAYVWRDKVCSMEIWKVMMKRDDQPQQHHLRKIDKALRNTNYCDTVKKQTRYGEGIGKQYGFSVDLASYY KNLKV lenti240 [~/perltest]>